1 Préparation, import des données et filtre

1.1 Chargement des packages, import des données

On commence par charger quelques packages, dont phyloseq et DESeq2

#install.packages("knitr")
library(knitr)
library(phyloseq)
library(DT)
library(VennDiagram)
library(tidyverse)
library(vegan)
library(reshape2)
library(permute)
library(lattice)
library(phangorn)
library(parallel)
library(gridBase)
library(biomformat)
library(plotly)
library(ggpubr)
library(openxlsx)
library(tidyr)
library(paletteer)
library(viridis)
library(dplyr)
library(gridExtra)

library(tidyr)

library(ggplot2)

library(plotly)
if (Sys.info()["user"] == 'mmariadasso') {
  source("~/Documents/Custom_Functions.R")  
} else {
  source("https://raw.githubusercontent.com/mahendra-mariadassou/phyloseq-extended/master/load-extra-functions.R")
}
## Custom package for extra graphical functions
# remotes::install_github("mahendra-mariadassou/phyloseq-extended", ref = "dev")
## Custom package to explore multi-affiliated taxa
# remotes::install_github("mahendra-mariadassou/affiliationExplorer")
# affiliationExplorer::run_app()
library(phyloseq.extended)

Puis on importe les données

frogsBiom <- "/Users/NIPPONNOYE/Documents/Teletravail_20201215/Biom Template Script/LECIMETA.biom1"
#frogsBiom <- "C:/Users/mmonnoye/Documents/Biom/LECIMETA/LECIMETA.biom1"
frogs.data.biom <- import_frogs(frogsBiom, taxMethod = "blast")

metadata<-read.table("/Users/NIPPONNOYE/Documents/Teletravail_20201215/Biom Template Script/SamplenameLECIMETA.txt",row.names=1, header=T)
#metadata<-read.table("C:/Users/mmonnoye/Documents/Biom/LECIMETA/SamplenameLECIMETA.txt",row.names=1, header=T)

sample_data(frogs.data.biom)<-metadata
sample_names(frogs.data.biom) <- get_variable(frogs.data.biom, "Name")
frogs.data<-frogs.data.biom

treefile<- read.tree("/Users/NIPPONNOYE/Documents/Teletravail_20201215/Biom Template Script/LECIMETA_tree.nhx")
#treefile<- read.tree("C:/Users/mmonnoye/Documents/Biom/LECIMETA/LECIMETA_tree.nhx") #if have treefile
phy_tree(frogs.data)    <- treefile

## Before correction / visualisation affichage noms taxonomic
#tax_table(frogs.data)[1, ]

tax_table(frogs.data) <- gsub("\"", "", tax_table(frogs.data))
tax_table(frogs.data) <- gsub("unknown.*", "Unknown", tax_table(frogs.data))

## After correction / visualisation affichage noms taxonomic
#tax_table(frogs.data)[1, ]

On dispose de l’object phyloseq suivant:

frogs.data
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 270 taxa and 107 samples ]
## sample_data() Sample Data:       [ 107 samples by 5 sample variables ]
## tax_table()   Taxonomy Table:    [ 270 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 270 tips and 269 internal nodes ]

qui contient les métadonnées suivantes:

sample_variables(frogs.data)
## [1] "Name"       "Reads"      "Group"      "Time_point" "Groupe"

1.2 Chargement d’un fichier excel

# data <- readxl::read_excel("/Users/NIPPONNOYE/Documents/Teletravail_20201201/ODORANTS/Graph Mahendra juillet 2019/Tableau de synthese preference odeurs.xlsx") %>%
#   mutate(ID = factor(ID))
# # Affichage du tableau
# DT::datatable(data, class = "compact")

1.3 Selection des echantillons / Analyses à TF / Sans les groupes Chow et HFPalm

## select only samples which read count higher than 10000 
# subset_samples(frogs.data, sample_sums(frogs.data) > 10000 & Time_point == "TF")

## Remove samples T2_8, T2_68
# subset_samples(frogs.data, !(sample_names(frogs.data) %in% c("T2_8", "T2_68")) )

#frogs.data <- subset_samples(frogs.data, Time_point == "TF")

 frogs.data <- subset_samples(frogs.data, Time_point == "TF" & Group %in% c("A","B","C","D"))
 
# frogs.data <- subset_samples(frogs.data, (Type == "A/J") & (Statut == "HFD") & (Period %in% c("T0", "T4", "Tf")))

# ## On renomme la variable utilisee pour l'abondance differentielle (ici "Type") en "Groupe"
# sample_data(data)$Groupe <- sample_data(data)$Type

Le “Group” correspond aux 6 groupes de l’étude: - Chow

  • High fat control faible en ALA (acide alpha-linoléinique ; la partie alipidique des régimes HF étant à base d’amidon, caséines…)

  • High fat avec ALA uniquement via des huiles (tout l’ALA est apporté par des TG)

  • High fat avec ALA, dont les lipides incluent 10% de lécithine de colza (ainsi vectrice d’~10% de l’ALA contenu dans le régime, sous forme de PL)

  • High fat avec ALA, dont les lipides incluent 20% de lécithine de colza ( ainsi vectrice d’~20% de l’ALA contenu dans le régime, sous forme de PL )

  • High fat avec ALA, dont les lipides incluent 10% de lécithine de soja ( ainsi vectrice d’~20% de l’ALA contenu dans le régime, sous forme de PL )

table(get_variable(frogs.data, "Group"))
## 
## A B C D 
## 6 6 6 6

Le “Groupe” regroupe les échantillons par type de régime et par temps

sample_data(frogs.data) %>% as("data.frame") %>% 
  count(Group, Time_point)
##   Group Time_point n
## 1     A         TF 6
## 2     B         TF 6
## 3     C         TF 6
## 4     D         TF 6
# table(get_variable(frogs.data, "Groupe"))

On compte le nombre de reads dans chaque type de régime et à chaque point de temps:

sample_data(frogs.data) %>% as("data.frame") %>% 
  count(Group, Time_point, wt = Reads, name = "Total_read_count")
##   Group Time_point Total_read_count
## 1     A         TF           221967
## 2     B         TF           204860
## 3     C         TF           188544
## 4     D         TF           195215

Le “Time_point” correspond aux temps: T0, T2 et TF

table(get_variable(frogs.data, "Time_point"))
## 
## TF 
## 24

Structure de la table d’OTU (exemple pour 6 OTUs):

#Count phyla by sample
head(otu_table(frogs.data))
## OTU Table:          [6 taxa and 24 samples]
##                      taxa are rows
##            TF_25 TF_26 TF_27 TF_31 TF_32 TF_33 TF_37 TF_38 TF_39 TF_43 TF_44
## Cluster_30    49    23   467   137    93    58   114   105   131    76    52
## Cluster_42     0     0     1     0     0     0     4     0     6     0     8
## Cluster_8      1     0     2     0     1     0     5     8    47     1    67
## Cluster_28   110    57   527    95   201    98   214   156   128   122    57
## Cluster_15    92    72   106   352   102   116   183   140   412   126   112
## Cluster_12    26    14    40    12    23    17   219   434   567   353   258
##            TF_45 TF_49 TF_50 TF_51 TF_55 TF_56 TF_57 TF_61 TF_62 TF_63 TF_67
## Cluster_30    39    69    75    97    17    48    48    41    17    96    22
## Cluster_42     2     0     4     0     8     1     3     0     5     1     1
## Cluster_8      2     5    10     0    27     3     3     0     9     9     1
## Cluster_28    55    60    81    86    24    79    73    54    23    79    31
## Cluster_15   104    85    98    49    55    55    68    90   162    63    11
## Cluster_12   115   436   868   283   287   851  1521   612   256   244    85
##            TF_68 TF_69
## Cluster_30    41    38
## Cluster_42     3     2
## Cluster_8      2     2
## Cluster_28    33    32
## Cluster_15    36    54
## Cluster_12    90   285
#ou ceci, les 5 premiers échantillons
#phyloseq::otu_table(frogs.data)[1:5, 1:5] 

Structure Taxonomy Table (exemple pour 6 OTUs):

head(tax_table(frogs.data))
## Taxonomy Table:     [6 taxa by 7 taxonomic ranks]:
##            Kingdom    Phylum          Class         Order          
## Cluster_30 "Bacteria" "Bacteroidetes" "Bacteroidia" "Bacteroidales"
## Cluster_42 "Bacteria" "Bacteroidetes" "Bacteroidia" "Bacteroidales"
## Cluster_8  "Bacteria" "Bacteroidetes" "Bacteroidia" "Bacteroidales"
## Cluster_28 "Bacteria" "Bacteroidetes" "Bacteroidia" "Bacteroidales"
## Cluster_15 "Bacteria" "Bacteroidetes" "Bacteroidia" "Bacteroidales"
## Cluster_12 "Bacteria" "Bacteroidetes" "Bacteroidia" "Bacteroidales"
##            Family           Genus     Species  
## Cluster_30 "Muribaculaceae" "Unknown" "Unknown"
## Cluster_42 "Muribaculaceae" "Unknown" "Unknown"
## Cluster_8  "Muribaculaceae" "Unknown" "Unknown"
## Cluster_28 "Muribaculaceae" "Unknown" "Unknown"
## Cluster_15 "Muribaculaceae" "Unknown" "Unknown"
## Cluster_12 "Muribaculaceae" "Unknown" "Unknown"

Structure tableau des metadonnées:

head(sample_data(frogs.data))
##        Name Reads Group Time_point Groupe
## TF_25 TF_25 34511     A         TF   TF_A
## TF_26 TF_26 36503     A         TF   TF_A
## TF_27 TF_27 42363     A         TF   TF_A
## TF_31 TF_31 33042     A         TF   TF_A
## TF_32 TF_32 42068     A         TF   TF_A
## TF_33 TF_33 33480     A         TF   TF_A

1.4 Visualisation de l’abondance par échantillon

1.4.1 Niveau Phylum

p<-plot_bar(frogs.data)
plot(p)

#ggplotly(p)  #pour avoir figure en html
p <- plot_bar(frogs.data, fill = "Phylum")
plot(p)

1.4.2 Niveau Family

#Plot des echantillons couleurs par Family             
p <- plot_bar(frogs.data, fill = "Family")
plot(p)

1.5 Filtres

On va commencer par filtrer les OTUs sur la base de leur prévalence: on ne garde que les OTUs qui sont présents dans au moins 50% des échantillons d’au moins un groupe.

# test_function <- function(x) { x > 0 }
# taxa.1 <- genefilter_sample(subset_samples(frogs.data, Group == "Chow"), test_function, A = 3)
# taxa.2 <- genefilter_sample(subset_samples(frogs.data, Group == "HFPalm"), test_function, A = 3)
# taxa.3 <- genefilter_sample(subset_samples(frogs.data, Group == "A"), test_function, A = 3)
# taxa.4 <- genefilter_sample(subset_samples(frogs.data, Group == "B"), test_function, A = 3)
# taxa.5 <- genefilter_sample(subset_samples(frogs.data, Group == "C"), test_function, A = 3)
# taxa.6 <- genefilter_sample(subset_samples(frogs.data, Group == "D"), test_function, A = 3)
# venn.plot <- venn.diagram(list("A"  = which(taxa.1),
#                                "B" = which(taxa.2),"C" = which(taxa.3),
#                               "D"= which(taxa.4)),
#                         filename = NULL, fill = c("#FE642E", "#F7D358", "#5FB404", "#0489B1"), lty = "blank",cex = 1.5, cat.cex = 1.5, cat.col = c("#FE642E", "#F7D358", "#5FB404", "#0489B1"))
# grid.newpage(); grid.draw(venn.plot)

Si pas de venn diagramm après le filtre :

prevalence_data <- estimate_prevalence(physeq = frogs.data, group = "Group")
## Keep only OTUs with prevalence > 0.5 in at least one group
taxa_to_keep <- filter(prevalence_data, prevalence >= 0.5) %>% pull(otu)

2 Composition des communautés

2.1 Représentation des communautés au niveau Phylum

Répartition des OTU par phylum

#Get count of phyla
table(phyloseq::tax_table(frogs.data)[, "Phylum"])
## 
##  Bacteroidetes     Firmicutes Proteobacteria    Tenericutes 
##             32            233              4              1

Boxplot Abondance

correct.order <- c("A", "B","C", "D")
sample_data(frogs.data)$Group <- factor(sample_data(frogs.data)$Group,
levels = correct.order)

phy<-tax_glom(frogs.data,taxrank="Phylum")
plotdata<-psmelt(phy)
p<-ggplot(plotdata,aes(x = Group,y=Abundance,fill=Group)) + geom_boxplot(aes(group=Group))+facet_wrap(~Phylum, scales = "free_y",ncol = 1)
                                           #,color=Group 
plot(p)                                       

Relative Abondance par échantillon

#install.packages("openxlsx") 
#library("openxlsx")

#Convert to relative abundance
data_rel_abund = phyloseq::transform_sample_counts(frogs.data, function(x) 100 * x/sum(x)) 

#Agglomerate to phylum-level and rename
data_phylum <- phyloseq::tax_glom(data_rel_abund, "Phylum")

phyloseq::taxa_names(data_phylum) <- phyloseq::tax_table(data_phylum)[, "Phylum"]
phyloseq::otu_table(data_phylum)
## OTU Table:          [4 taxa and 24 samples]
##                      taxa are rows
##                   TF_25    TF_26     TF_27    TF_31     TF_32    TF_33
## Bacteroidetes  30.88340 31.40680 56.022300 36.85098 34.657553 44.99479
## Firmicutes     39.36064 49.04111 40.399326 38.09747 59.190579 40.27261
## Tenericutes     0.00000  0.00000  0.000000  0.00000  0.000000  0.00000
## Proteobacteria 29.75596 19.55208  3.578374 25.05155  6.151868 14.73261
##                    TF_37     TF_38    TF_39     TF_43     TF_44     TF_45
## Bacteroidetes  52.236341 55.345061 45.31785 63.381344 57.973487 61.417934
## Firmicutes     38.459329 37.993130 41.74556 29.665938 35.057094 28.591374
## Tenericutes     0.000000  0.000000  0.00000  0.000000  0.000000  0.000000
## Proteobacteria  9.304329  6.661809 12.93659  6.952718  6.969419  9.990692
##                    TF_49    TF_50    TF_51    TF_55    TF_56     TF_57
## Bacteroidetes  56.526006 52.35453 59.10607 56.70439 59.81752 67.918455
## Firmicutes     35.852143 36.60549 34.26729 32.63955 30.07169 24.177396
## Tenericutes     0.000000  0.00000  0.00000  0.00000  0.00000  0.000000
## Proteobacteria  7.621851 11.03998  6.62664 10.65605 10.11079  7.904149
##                    TF_61    TF_62     TF_63     TF_67     TF_68    TF_69
## Bacteroidetes  47.398718 34.52678 59.592410 50.155682 57.073735 37.41912
## Firmicutes     46.087147 53.60235 33.573436 42.293721 33.996003 33.27642
## Tenericutes     0.000000  0.00000  0.000000  0.000000  0.000000  0.00000
## Proteobacteria  6.514136 11.87087  6.834153  7.550597  8.930262 29.30446
rel_abun<-phyloseq::otu_table(data_phylum)

#write.xlsx(rel_abun,"Relative Abundance Phylum TF.xlsx",row.names=TRUE,col.names = TRUE, sep="\t",dec=",",na=" ")
correct.order <- c("A", "B","C", "D")
sample_data(frogs.data)$Group <- factor(sample_data(frogs.data)$Group,
levels = correct.order)
#levels(get_variable(frogs.data.merged, "Groupe"))

p <- plot_composition(frogs.data, "Kingdom", "Bacteria", "Phylum",numberOfTaxa = 5, fill = "Phylum")
p<- p + facet_wrap(~Group, scales = "free_x", nrow = 1)    # + facet_wrap(Groupe~Statut, scales = "free_x", nrow = 1)

plot(p)

Changement de titre / Personnalisation des couleurs / Supprimer noms axe des x

family.palette<- c( "Bacteroidetes"           = "#CBAC45", 
                   "Firmicutes"             = "#3DCC98", 
                   "Proteobacteria"                  = "#48B3E8",
                  "Tenericutes"                = "#F28372")

p <- plot_composition(frogs.data, "Kingdom", "Bacteria", "Phylum",numberOfTaxa = 4, fill = "Phylum")+ ggtitle("V1-V3") +  scale_color_manual(values = family.palette) + scale_fill_manual(values = family.palette)
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1) + theme(axis.text.x = element_blank(),  axis.ticks = element_blank())  
    #p <- p + facet_wrap(Groupe~Statut, scales = "free_x", nrow = 1)

plot(p)

#ggsave(p, file = "/Users/NIPPONNOYE/Documents/Teletravail_20201211/V1V3 _ 8 sample/Phylum Composition.png", width = 6, height = 5,dpi=300)

Test de Kruskal (Anova non paramétrique) pour chaque Phylum pour tester si les abondances sont similaires (pour ce Phylum) entre les différents groupes.

# depth <- sample_sums(frogs.data)[1] 
data.phylum <- frogs.data %>% 
  transform_sample_counts(function(x) { x / sum(x)}) %>% ## transform counts to proportions
  fast_tax_glom(taxrank = "Phylum") %>% 
  psmelt() %>% 
  mutate(Group = factor(Group, labels = c("A", "B","C", "D")))
data.test <- compare_means(Abundance ~ Group, data = data.phylum, method = "kruskal", group.by = "Phylum")
data.test
## # A tibble: 4 x 7
##   Phylum         .y.              p   p.adj p.format p.signif method        
##   <chr>          <chr>        <dbl>   <dbl> <chr>    <chr>    <chr>         
## 1 Bacteroidetes  Abundance   0.0119   0.036 0.012    *        Kruskal-Wallis
## 2 Firmicutes     Abundance   0.0314   0.063 0.031    *        Kruskal-Wallis
## 3 Proteobacteria Abundance   0.780    0.78  0.780    ns       Kruskal-Wallis
## 4 Tenericutes    Abundance NaN      NaN     NA       ?        Kruskal-Wallis

On change les noms de Phylum dans notre objet phyloseq pour y rajouter les étoiles de significativité.

new.phylum <- c("Firmicutes"        = "Firmicutes (*)", 
                "Proteobacteria"    = "Proteobacteria",
                "Bacteroidetes"     = "Bacteroidetes (*)",
                "Tenericutes"       = "Tenericutes"
                )
frogs.data.2 <- frogs.data
tax_table(frogs.data.2)[ , "Phylum"] <- new.phylum[as(tax_table(frogs.data), "matrix")[ , "Phylum"]]
sample_data(frogs.data.2)$Group <- factor(sample_data(frogs.data.2)$Group, labels = c("A", "B","C", "D"))

On va enlever les Tenericutes car seulement 1 OTU.

my.palette <- c("#FC8385", "#71C749", "#55CDD3")
                                                       #Retirer Tenericutes
p1 <- plot_composition(frogs.data.2 %>% subset_taxa(Phylum != "Tenericutes"), 
                      "Kingdom", "Bacteria", "Phylum", fill = "Phylum", facet_grid = "~Group", x = "Name", numberOfTaxa = 6) + 
  scale_fill_manual(name = NULL, values = my.palette) + 
  scale_color_manual(name = NULL, values = my.palette) + 
  theme(legend.position = "bottom", axis.text.x = element_blank())+
  theme(legend.text = element_text(size=12))  #taille texte legende
plot(p1)  

Test Dunn Post-Hoc sur les résultats du Kruskall Wallis de l’impact du group sur l’abondance par Phylum

filter(data.phylum, Phylum == "Bacteroidetes") %>% 
  dunn.test::dunn.test(x = .$Abundance, g = .$Group, method = "bh")
## 
##                            Comparison of x by group                            
##                              (Benjamini-Hochberg)                              
## Col Mean-|
## Row Mean |          A          B          C
## ---------+---------------------------------
##        B |  -2.531139
##          |    0.0171*
##          |
##        C |  -3.021037  -0.489897
##          |    0.0076*     0.3121
##          |
##        D |  -1.306394   1.224744   1.714642
##          |     0.1436     0.1324     0.0864
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
filter(data.phylum, Phylum == "Firmicutes") %>% 
  dunn.test::dunn.test(x = .$Abundance, g = .$Group, method = "bh")
## 
##                            Comparison of x by group                            
##                              (Benjamini-Hochberg)                              
## Col Mean-|
## Row Mean |          A          B          C
## ---------+---------------------------------
##        B |   2.000416
##          |     0.0682
##          |
##        C |   2.816913   0.816496
##          |    0.0145*     0.2071
##          |
##        D |   1.061445  -0.938971  -1.755467
##          |     0.2164     0.2086     0.0792
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
filter(data.phylum, Phylum == "Proteobacteria") %>% 
  dunn.test::dunn.test(x = .$Abundance, g = .$Group, method = "bh")
## 
##                            Comparison of x by group                            
##                              (Benjamini-Hochberg)                              
## Col Mean-|
## Row Mean |          A          B          C
## ---------+---------------------------------
##        B |   0.979795
##          |     0.9816
##          |
##        C |   0.694022  -0.285773
##          |     0.4877     0.5813
##          |
##        D |   0.775671  -0.204124   0.081649
##          |     0.6569     0.5030     0.4675
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2

Relative abondance par groupe

#Convert to relative abundance
data_rel_abund = phyloseq::transform_sample_counts(frogs.data, function(x) 100 * x/sum(x))

#Agglomerate to phylum-level and rename
data_phylum <- phyloseq::tax_glom(data_rel_abund, "Phylum")

ps1 <- merge_samples(data_phylum, "Group")
data_phylum_2 <- transform_sample_counts(ps1, function(x) 100 * x/sum(x))

phyloseq::taxa_names(data_phylum_2) <- phyloseq::tax_table(data_phylum_2)[, "Phylum"]
phyloseq::otu_table(data_phylum_2)
## OTU Table:          [4 taxa and 4 samples]
##                      taxa are columns
##   Bacteroidetes Firmicutes Tenericutes Proteobacteria
## A      39.13597   44.39362           0      16.470406
## B      55.94534   35.25207           0       8.802593
## C      58.73783   32.26893           0       8.993243
## D      47.69441   40.47151           0      11.834080
rel_abun<-phyloseq::otu_table(data_phylum_2)

#write.xlsx(rel_abun,"Relative Abondance Phylum moyenne TF.xlsx",row.names=TRUE,col.names = TRUE, sep="\t",dec=",",na=" ")

Boxplot Relative Abundance en %

#Melt and plot
phyloseq::psmelt(data_phylum) %>%
ggplot(data = ., aes(x = Group, y = Abundance,color=Phylum)) +
  geom_boxplot(outlier.shape  = NA) +
  geom_jitter(aes(color = Phylum), height = 0, width = .2) +
  labs(x = "", y = "Relative Abundance (%)") +
  facet_wrap(~ Phylum, scales = "free",ncol = 1)

Retirer Tenericutes

frogs.data.merged<-merge_samples(frogs.data,group="Group")
## Restaurer les niveaux du facteur utilisé pour fusionner les échantillons
sample_data(frogs.data.merged)$Group <- sample_names(frogs.data.merged)


correct.order <- c("A", "B","C", "D")
sample_data(frogs.data.merged)$Group <- factor(sample_data(frogs.data.merged)$Group,
levels = correct.order)
#levels(get_variable(frogs.data.merged, "Groupe"))

p <- plot_composition(frogs.data.merged %>% subset_taxa(Phylum != "Tenericutes"), "Kingdom", "Bacteria", "Phylum",numberOfTaxa = 5, fill = "Phylum")
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1)
p <- p + ggtitle("Phylum Composition TF") ## Changer le titre 

plot(p)

#ggsave(p, file = "C:/Users/mmonnoye/Documents/Biom/Zahra/figure_example_4x4.png", width = 3, height = 4)
frogs.data.merged<-merge_samples(frogs.data,group="Group")
## Restaurer les niveaux du facteur utilisé pour fusionner les échantillons
sample_data(frogs.data.merged)$Group <- sample_names(frogs.data.merged)

positions <- c("A", "B","C", "D")

p <- plot_composition(frogs.data.merged, "Kingdom", "Bacteria", "Phylum",numberOfTaxa = 5, fill = "Phylum")
#p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1)
p <- p + ggtitle("Phylum")+theme(panel.background=element_rect(fill="transparent",colour=NA)) + ## Changer le titre + Changer arrière plan
  #theme(legend.position = "bottom",legend.box = "vertical") 
 theme(legend.title = element_text(color = "black", size = 14),legend.text = element_text(color = "black", size = 10))+ #legende taille noms
     theme(plot.margin=unit(c(0.1,0.1,6,0.1),"cm")) + theme(legend.position=c(0.50,-0.50))+
  # Modifier elements titre et nom des axes
  theme(plot.title = element_text(color="Black", size=16, face="bold.italic",hjust=0.5),
      axis.title.y = element_text(color="black", size=14))+
  # Modifier graduation des axes
    theme(axis.text.x = element_text(face="bold", color="black", size=13),
          axis.text.y = element_text( color="black", size=12))+ #, angle=45, face="bold"
        scale_x_discrete(limits = positions)  # changer ordre bars selon l'argument position
               
plot(p)

#ggsave(p, file = "/Users/NIPPONNOYE/Documents/2020 Boulot Mag/2020 Télétravail/Teletravail_20201001/Phylum.eps", width = 3, height = 6)

Titre et sous titre / Echelle en %

frogs.data.merged<-merge_samples(frogs.data,group="Group")
## Restaurer les niveaux du facteur utilis? pour fusionner les ?chantillons
sample_data(frogs.data.merged)$Group <- sample_names(frogs.data.merged)

correct.order <- c("A", "B","C", "D")
sample_data(frogs.data.merged)$Group <- factor(sample_data(frogs.data.merged)$Group,
levels = correct.order)
#levels(get_variable(frogs.data.merged, "Groupe"))


p <- plot_composition(frogs.data.merged, "Kingdom", "Bacteria", "Phylum",numberOfTaxa = 6, fill = "Phylum")
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1)+
      scale_y_continuous(label = scales::percent)
p <- p +  labs(x = "Samples", y = "Relative abundance (%)",
                                   title = "Phylum Composition TF",
                                   subtitle = "6 Top Phylum") 
plot(p)

#ggsave(p, file = "C:/Users/mmonnoye/Documents/Phylum Composition.png", width = 4, height = 4)

Boxplot Phylum avec stats

On va se focaliser sur les 4 Phylum et représenter leurs abondances dans les différents groupe sous forme de boxplot. On va utiliser A comme groupe de référence et le comparer à tout les autres avec un test de wilcoxon pour voir desquels il diffère. Le nombre d’étoiles codent le niveau de significativité, les comparaisons non-significatives ne sont pas indiqués

## Select only some families  
families <- c("Bacteroidetes", "Firmicutes", "Proteobacteria","Tenericutes")
phy <- frogs.data %>% subset_taxa(Phylum %in% families) %>% tax_glom(taxrank="Phylum") #agglomerate at family level
 
 ## Transform count to relative abundances 
depth <- sample_sums(frogs.data)[1] 
plotdata<-psmelt(phy) %>% 
  mutate(Abundance  = Abundance / depth, 
         # Phylum = factor(Phylum, levels = families),  ## set family order to the one specified in `families` rather than the alphabetical one. 
         Group = factor(Group, labels = c("A", "B","C", "D")))  ## change treatment names to the paper ones
  
p <- ggplot(plotdata,aes(x = Group,y=Abundance, color = Group, Group = Group)) + 
  stat_boxplot(geom = "errorbar", width = 0.5) + 
  geom_boxplot(outlier.alpha = 1, 
               outlier.size = 0.8) + 
  facet_wrap(~Phylum, scales = "free_y", ncol = 4) + 
  scale_color_manual(values = c("A" = "#09C51F",   
                                "B" = "#8BE9F9", 
                                "C" = "#0995C5", 
                                "D" = "#094DC5"),
                     guide = "none") + 
  
  # theme_classic() + ## fond blanc
  labs(x = NULL) + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 15))

## Compare reference level Chow to all other levels using a wilcoxon test and adjust p-values using the holm correction. 
p <- p + stat_compare_means(aes(label = ..p.signif..), method = "wilcox.test", p.adjust.method = "holm", ref.group = "Chow", hide.ns = T, 
                       label.y.npc = c(0.90),
                       size = 7, 
                       fontface = "bold")
plot(p)

## Représentation des communautés au niveau Family

correct.order <- c("A", "B","C", "D")
sample_data(frogs.data)$Group <- factor(sample_data(frogs.data)$Group,
levels = correct.order)
#levels(get_variable(frogs.data.merged, "Groupe"))

p <- plot_composition(frogs.data, "Kingdom", "Bacteria", "Family",numberOfTaxa = 12, fill = "Family")
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1)

plot(p)

Relative abondance par Family

#Convert to relative abundance
data_rel_abund = phyloseq::transform_sample_counts(frogs.data, function(x) 100 * x/sum(x))

#Agglomerate to phylum-level and rename
data_phylum <- phyloseq::tax_glom(data_rel_abund, "Family")

ps1 <- merge_samples(data_phylum, "Group")
data_phylum_2 <- transform_sample_counts(ps1, function(x) 100 * x/sum(x))

phyloseq::taxa_names(data_phylum_2) <- phyloseq::tax_table(data_phylum_2)[, "Family"]
phyloseq::otu_table(data_phylum_2)
## OTU Table:          [19 taxa and 4 samples]
##                      taxa are columns
##   Muribaculaceae Tannerellaceae Prevotellaceae Bacteroidaceae Marinifilaceae
## A       7.511593      0.6872592       1.102289       5.813509       2.485080
## B      11.859320      1.5912746       3.958224       5.052995       5.560867
## C      16.188255      0.4286485       6.370489       6.410393       4.329306
## D       9.751098      0.3719997       4.922359       5.455489       4.045521
##   Rikenellaceae Lachnospiraceae Peptostreptococcaceae Family XIII
## A      21.53624        27.99114             0.1150514  0.13689625
## B      27.92266        21.97917             0.1359102  0.09446598
## C      25.01074        22.17903             0.1403283  0.04089012
## D      23.14794        28.37958             0.5832495  0.08085836
##   Clostridiaceae 1 Anaeroplasmataceae Erysipelotrichaceae Lactobacillaceae
## A       0.03496661                  0           2.8539398        0.3516066
## B       0.04968114                  0           0.3481496        0.7941359
## C       0.17255032                  0           0.2781379        0.5096389
## D       0.08402799                  0           0.6920717        2.0834285
##   Christensenellaceae Ruminococcaceae Clostridiales vadinBB60 group
## A         0.006407880       12.876443                    0.02717136
## B         0.025499817       10.315114                    1.50994356
## C         0.028894354        7.089756                    1.82969743
## D         0.009367567        7.651599                    0.90732859
##   Desulfovibrionaceae Burkholderiaceae Enterobacteriaceae
## A           15.886803       0.53515495         0.04844804
## B            8.637055       0.10078781         0.06475014
## C            8.839571       0.08640673         0.06726520
## D           11.506948       0.16843367         0.15869848
rel_abun<-phyloseq::otu_table(data_phylum_2)

#write.xlsx(rel_abun,"Relative Abondance Family moyenne TF.xlsx",row.names=TRUE,col.names = TRUE, sep="\t",dec=",",na=" ")

Chunk pour meilleure qualité de graph

frogs.data.merged<-merge_samples(frogs.data,group="Group")
## Restaurer les niveaux du facteur utilisé pour fusionner les échantillons
sample_data(frogs.data.merged)$Group <- sample_names(frogs.data.merged)


correct.order <- c("A", "B","C", "D")
sample_data(frogs.data.merged)$Group <- factor(sample_data(frogs.data.merged)$Group,
levels = correct.order)
#levels(get_variable(frogs.data.merged, "Groupe"))

p <- plot_composition(frogs.data.merged, "Kingdom", "Bacteria", "Family",numberOfTaxa = 6, fill = "Family")
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1)
p <- p + ggtitle("Family Composition TF (6 top Family)") ## Changer le titre 

plot(p)

#ggsave(p, file = "C:/Users/mmonnoye/Documents/Biom/Zahra/figure_example_4x4.png", width = 3, height = 4)

Boxplot Family avec stats

On va se focaliser sur quelques familles et représenter leurs abondances dans les différents groupe sous forme de boxplot. On va utiliser Chow comme groupe de référence et le comparer à tout les autres avec un test de wilcoxon pour voir desquels il diffère. Le nombre d’étoiles codent le niveau de significativité, les comparaisons non-significatives ne sont pas indiqués

## Select only some families  
families <- c("Rikenellaceae", "Bacteroidaceae", "Muribaculaceae","Lachnospiraceae", "Ruminococcaceae", "Desulfovibrionaceae")
phy <- frogs.data %>% subset_taxa(Family %in% families) %>% tax_glom(taxrank="Family") #agglomerate at family level
 
 ## Transform count to relative abundances 
depth <- sample_sums(frogs.data)[1] 
plotdata<-psmelt(phy) %>% 
  mutate(Abundance  = Abundance / depth, 
         # Family = factor(Family, levels = families),  ## set family order to the one specified in `families` rather than the alphabetical one. 
         Group = factor(Group, labels = c("A", "B","C", "D")))  ## change treatment names to the paper ones
  
p <- ggplot(plotdata,aes(x = Group,y=Abundance, color = Group, Group = Group)) + 
  stat_boxplot(geom = "errorbar", width = 0.5) + 
  geom_boxplot(outlier.alpha = 1, 
               outlier.size = 0.8) + 
  facet_wrap(~Family, scales = "free_y", ncol = 2) + 
  #scale_color_brewer(palette = "Set3") + 
  
  # theme_classic() + ## fond blanc
  labs(x = NULL) + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 15))

## Compare reference level Chow to all other levels using a wilcoxon test and adjust p-values using the holm correction. 
p3 <- p + stat_compare_means(aes(label = ..p.signif..), method = "wilcox.test", p.adjust.method = "holm", ref.group = "Chow", hide.ns = T, 
                       label.y.npc = c(0.90),
                       size = 7, 
                       fontface = "bold")
plot(p3)

Palette Viridis

## Select only some families  
families <- c("Rikenellaceae", "Bacteroidaceae", "Muribaculaceae","Lachnospiraceae", "Ruminococcaceae", "Desulfovibrionaceae")
phy <- frogs.data %>% subset_taxa(Family %in% families) %>% tax_glom(taxrank="Family") #agglomerate at family level
 
 ## Transform count to relative abundances 
depth <- sample_sums(frogs.data)[1] 
plotdata<-psmelt(phy) %>% 
  mutate(Abundance  = Abundance / depth, 
         # Family = factor(Family, levels = families),  ## set family order to the one specified in `families` rather than the alphabetical one. 
         Group = factor(Group, labels = c("A", "B","C", "D")))  ## change treatment names to the paper ones
  
p <- ggplot(plotdata,aes(x = Group,y=Abundance, color = Group, Group = Group)) + 
  stat_boxplot(geom = "errorbar", width = 0.5) + 
  geom_boxplot(outlier.alpha = 1, 
               outlier.size = 0.8) + 
  facet_wrap(~Family, scales = "free_y", ncol = 2) + 
### Palette colours viridis 
  scale_color_viridis(discrete = TRUE, option = "D")+
  scale_fill_viridis(discrete = TRUE) +
  theme_minimal()  +
  # theme_classic() + ## fond blanc
  labs(x = NULL) + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 15))

## Compare reference level Chow to all other levels using a wilcoxon test and adjust p-values using the holm correction. 
p <- p + stat_compare_means(aes(label = ..p.signif..), method = "wilcox.test", p.adjust.method = "holm", ref.group = "Chow", hide.ns = T, 
                       label.y.npc = c(0.90),
                       size = 7, 
                       fontface = "bold")
plot(p)

frogs.data.merged<-merge_samples(frogs.data,group="Group")
## Restaurer les niveaux du facteur utilisé pour fusionner les échantillons
sample_data(frogs.data.merged)$Group <- sample_names(frogs.data.merged)

correct.order <- c("A", "B","C", "D")
sample_data(frogs.data.merged)$Group <- factor(sample_data(frogs.data.merged)$Group,
levels = correct.order)
#levels(get_variable(frogs.data.merged, "diet"))

my.pal <- c(RColorBrewer::brewer.pal(n = 12, name = "Paired"), "gray")

p <- plot_composition(frogs.data.merged, "Kingdom", "Bacteria", "Family",numberOfTaxa = 12, fill = "Family")
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1) + 
  scale_fill_manual(values = my.pal) + 
  scale_color_manual(values = my.pal) +
      scale_y_continuous(label = scales::percent) 
  #theme_bw()
  ## scale_fill_brewer(palette="Paired") + 
  ## scale_colour_brewer(palette="Paired") +
  
plot(p)

#ggsave(p, file = "C:/Users/mmonnoye/Documents/Family Composition.png", width = 6, height = 4)

Les 4 phylums d’intérêts sont les Bacteroidetes, les Firmicutes, les Proteobacteria et les Tenericutes.

On va zoomer au niveau Famille dans chacun de ces phylums.

2.1.0.1 Bacteroidetes

correct.order <- c("A", "B","C", "D")
sample_data(frogs.data.merged)$Group <- factor(sample_data(frogs.data.merged)$Group,
levels = correct.order)

p <- plot_composition(frogs.data, "Phylum", "Bacteroidetes", "Family",numberOfTaxa = 5, fill = "Family")
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1)
#ggplotly(p)
plot(p)

frogs.data.merged<-merge_samples(frogs.data,group="Group")
## Restaurer les niveaux du facteur utilis? pour fusionner les ?chantillons
sample_data(frogs.data.merged)$Group <- sample_names(frogs.data.merged)

correct.order <- c("A", "B","C", "D")
sample_data(frogs.data.merged)$Group <- factor(sample_data(frogs.data.merged)$Group,
levels = correct.order)

p <- plot_composition(frogs.data.merged, "Phylum", "Bacteroidetes", "Family",numberOfTaxa = 5, fill = "Family")
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1)
p <- p + ggtitle("Bacteroidetes Composition TF") ## Changer le titre 
plot(p)

#ggsave(p, file = "C:/Users/mmonnoye/Documents/Biom/Zahra/figure_example_4x4.png", width = 3, height = 4)

Autre echelle de couleur

frogs.data.merged<-merge_samples(frogs.data,group="Group")
## Restaurer les niveaux du facteur utilis? pour fusionner les ?chantillons
sample_data(frogs.data.merged)$Group <- sample_names(frogs.data.merged)

correct.order <- c("A", "B","C", "D")
sample_data(frogs.data.merged)$Group <- factor(sample_data(frogs.data.merged)$Group,
levels = correct.order)

p <- plot_composition(frogs.data.merged, "Phylum", "Bacteroidetes", "Family",numberOfTaxa = 5, fill = "Family")
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1)
p <- p + ggtitle("Bacteroidetes Composition ")+
      scale_y_continuous(label = scales::percent) + scale_color_paletteer_d("pals::glasbey")+
  scale_fill_paletteer_d("pals::glasbey") 

plot(p)

frogs.data.merged<-merge_samples(frogs.data,group="Group")
## Restaurer les niveaux du facteur utilisé pour fusionner les échantillons
sample_data(frogs.data.merged)$Group <- sample_names(frogs.data.merged)

p <- plot_composition(frogs.data.merged, "Family", "Bacteroidaceae", "Genus",numberOfTaxa = 14, fill = "Genus")
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1)
p <- p + ggtitle("Bacteroidaceae Composition ")+
      scale_y_continuous(label = scales::percent) + scale_color_paletteer_d("pals::glasbey")+
  scale_fill_paletteer_d("pals::glasbey") ## Changer le titre 

plot(p)

#ggsave(p, file = "C:/Users/mmonnoye/Documents/Biom/Zahra/figure_example_4x4.png", width = 3, height = 4)
#ggsave(p, file = "Lactobacillaceae Composition .png",width = 9, height = 7,dpi=300)
frogs.data.merged<-merge_samples(frogs.data,group="Group")
## Restaurer les niveaux du facteur utilisé pour fusionner les échantillons
sample_data(frogs.data.merged)$Group <- sample_names(frogs.data.merged)

p <- plot_composition(frogs.data.merged, "Genus", "Bacteroides", "Species",numberOfTaxa = 14, fill = "Species")
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1)
p <- p + ggtitle("Bacteroides Composition ")+
      scale_y_continuous(label = scales::percent) + scale_color_paletteer_d("pals::glasbey")+
  scale_fill_paletteer_d("pals::glasbey") ## Changer le titre 

plot(p)

#ggsave(p, file = "C:/Users/mmonnoye/Documents/Biom/Zahra/figure_example_4x4.png", width = 3, height = 4)
#ggsave(p, file = "Ruminococcus 1 Composition .png",width = 8, height = 7,dpi=300)

2.1.0.2 Firmicutes

correct.order <- c( "A", "B","C", "D")
sample_data(frogs.data.merged)$Group <- factor(sample_data(frogs.data.merged)$Group,
levels = correct.order)

p <- plot_composition(frogs.data, "Phylum", "Firmicutes", "Family",numberOfTaxa = 5, fill = "Family")
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1)
#ggplotly(p)
plot(p)

frogs.data.merged<-merge_samples(frogs.data,group="Group")
## Restaurer les niveaux du facteur utilis? pour fusionner les ?chantillons
sample_data(frogs.data.merged)$Group <- sample_names(frogs.data.merged)

correct.order <- c("A", "B","C", "D")
sample_data(frogs.data.merged)$Group <- factor(sample_data(frogs.data.merged)$Group,
levels = correct.order)

p <- plot_composition(frogs.data.merged, "Phylum", "Firmicutes", "Family",numberOfTaxa = 5, fill = "Family")
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1)
p <- p + ggtitle("Firmicutes Composition TF") ## Changer le titre 
plot(p)

#ggsave(p, file = "C:/Users/mmonnoye/Documents/Biom/Zahra/figure_example_4x4.png", width = 3, height = 4)

2.1.0.3 Proteobacteria

correct.order <- c("A", "B","C", "D")
sample_data(frogs.data.merged)$Group <- factor(sample_data(frogs.data.merged)$Group,
levels = correct.order)

p <- plot_composition(frogs.data, "Phylum", "Proteobacteria", "Family",numberOfTaxa = 5, fill = "Family")
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1)

plot(p)

frogs.data.merged<-merge_samples(frogs.data,group="Group")
## Restaurer les niveaux du facteur utilis? pour fusionner les ?chantillons
sample_data(frogs.data.merged)$Group <- sample_names(frogs.data.merged)

correct.order <- c("A", "B","C", "D")
sample_data(frogs.data.merged)$Group <- factor(sample_data(frogs.data.merged)$Group,
levels = correct.order)

p <- plot_composition(frogs.data.merged, "Phylum", "Proteobacteria", "Family",numberOfTaxa = 5, fill = "Family")
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1)
p <- p + ggtitle("Proteobacteria Composition TF") ## Changer le titre 
plot(p)

#ggsave(p, file = "C:/Users/mmonnoye/Documents/Biom/Zahra/figure_example_4x4.png", width = 3, height = 4)

3 Diversité \(\alpha\)

3.1 Courbes de raréfaction

Comme ces distances sont sensibles à la profondeur d’échantillonnage, on va raréfier les échantillons avant de calculer les distances. Normalisation par rarefaction: même nombre de séquences pour chaque échantillon.

frogs.data.rare<-rarefy_even_depth(frogs.data,rngseed=20170329)
sample_sums(frogs.data.rare)[1:5]
## TF_25 TF_26 TF_27 TF_31 TF_32 
##  5692  5692  5692  5692  5692

Avant de calculer les diversités \(\alpha\), on va faire des courbes de raréfaction pour vérifier si on a saturé la richesse sous-dominante (i.e. celle qui passe les filtres d’abondances).

Infos Richesse: nombre d’OTUs par échantillon représenté par les courbes de raréfaction

Principe courbes de rarefaction : Compter le nombre d’OTU pour un ensemble de sous-échantillon à différents intervalles de profondeur (x= nombre de séquences et y= nombre d’OTU)

#Rarefaction curves Regime (for observed richness)
#correct.order <- c("Avant_Stress", "Milieu_Stress", "Fin_Stress")
#sample_data(frogs.data.merged)$Groupe <- factor(sample_data(frogs.data.merged)$Groupe,
#levels = correct.order)
family.palette<- c( "A"           = "#09C51F", 
                   "B"          = "#8BE9F9", 
                   "C"           = "#0995C5", 
                   "D"          = "#094DC5")

p <- ggrare(frogs.data, step = 100, color = "Group", plot = FALSE)
## rarefying sample TF_25
## rarefying sample TF_26
## rarefying sample TF_27
## rarefying sample TF_31
## rarefying sample TF_32
## rarefying sample TF_33
## rarefying sample TF_37
## rarefying sample TF_38
## rarefying sample TF_39
## rarefying sample TF_43
## rarefying sample TF_44
## rarefying sample TF_45
## rarefying sample TF_49
## rarefying sample TF_50
## rarefying sample TF_51
## rarefying sample TF_55
## rarefying sample TF_56
## rarefying sample TF_57
## rarefying sample TF_61
## rarefying sample TF_62
## rarefying sample TF_63
## rarefying sample TF_67
## rarefying sample TF_68
## rarefying sample TF_69
rare.level <- min(sample_sums(frogs.data))
p <- p + facet_wrap(~Group) + geom_vline(xintercept = rare.level, color = "Gray60")+
  xlab("Sample Size (nb de séquences)") + ylab("Species Richness (nb d'OTU)")+ scale_color_manual(values = family.palette)+ scale_fill_manual(breaks = c( "A","B", "C", "D"), 
                       values=c("#09C51F","#8BE9F9", "#0995C5", "#094DC5"))
#ggplotly(p)
plot(p)

3.2 Effet du régime sur la diversité

#correct.order <- c("Avant_Stress", "Milieu_Stress", "Fin_Stress")
#sample_data(frogs.data.rare)$Groupe <- factor(sample_data(frogs.data.rare)$Groupe,
#levels = correct.order)

# family.palette<- c( 
#                    "A"           = "#09C51F", 
#                    "B"          = "#8BE9F9", 
#                    "C"           = "#0995C5", 
#                    "D"          = "#094DC5")

p <- plot_richness(frogs.data.rare, color = "Group",measures = c("Observed", "Chao1", "ACE", "Shannon", "Simpson", "InvSimpson", "Fisher"),x = "Group", title = "Alpha diversity Comparaison microbiote") +theme_bw() + geom_boxplot(aes(fill = Group)) +  geom_point() + theme(axis.text.x = element_blank()) 
# + scale_color_manual(values = family.palette)+ scale_fill_manual(breaks = c("A","B", "C", "D"), 
 #                      values=c("#09C51F","#8BE9F9", "#0995C5", "#094DC5"))

#ggplotly(p)
plot(p)

3.3 Richness Table

richness.table <- estimate_richness(frogs.data.rare, measures = c("Observed", "Chao1", "ACE", "Shannon", "Simpson", "InvSimpson", "Fisher"))
richness.table <- cbind(richness.table, sample_data(frogs.data.rare))
richness.table$Depth <- sample_sums(frogs.data.rare)
#knitr::kable(richness.table)
richness.table
##       Observed    Chao1  se.chao1      ACE   se.ACE  Shannon   Simpson
## TF_25      125 138.9091  8.684721 137.1734 5.781133 3.235785 0.8865241
## TF_26      110 122.0000  7.960353 119.8046 5.359846 2.986277 0.8953327
## TF_27       98 125.1429 16.502556 113.9737 5.308976 2.931824 0.8910912
## TF_31      110 127.1000 10.410991 124.0407 5.585664 3.183385 0.9126987
## TF_32      145 174.5455 15.534677 163.3709 6.319929 3.555453 0.9399338
## TF_33      123 131.5714  5.748565 132.6944 5.652564 3.460261 0.9358264
## TF_37      155 171.8667  9.289624 167.9237 6.271825 3.572851 0.9429965
## TF_38      153 162.0667  5.894332 160.7415 6.148528 3.746127 0.9586345
## TF_39      157 173.8667  9.289637 171.0081 6.445127 3.790271 0.9593817
## TF_43      149 162.6364  7.259506 164.8707 6.189332 3.440639 0.9347040
## TF_44      136 155.4615 10.733898 150.3924 5.942000 3.308821 0.9199082
## TF_45      131 137.5000  4.881065 136.8566 5.692891 3.347422 0.9281925
## TF_49      150 155.2174  3.689699 158.3837 6.135304 3.626107 0.9521367
## TF_50      151 167.8667  9.289597 163.6842 6.180999 3.582712 0.9484626
## TF_51      145 152.7727  4.881132 156.5957 6.153296 3.546282 0.9448029
## TF_55      132 141.2308  6.165558 140.2350 5.772755 3.328202 0.9211704
## TF_56      126 139.1250  7.594515 138.5742 5.618986 3.030536 0.9155367
## TF_57      130 135.2174  3.689656 139.6014 5.802700 3.108889 0.9085717
## TF_61      109 130.1111 12.606579 123.3349 5.526767 3.061096 0.9086255
## TF_62      128 139.7692  7.362383 138.5076 5.706652 3.566590 0.9506657
## TF_63      124 131.0000  4.904939 132.6817 5.651270 3.475671 0.9442988
## TF_67      147 172.0000 13.300239 164.0753 6.343085 3.598094 0.9506249
## TF_68      136 147.4000  6.955166 146.1642 5.833108 3.309084 0.9185663
## TF_69      127 139.1579  6.905637 144.7155 6.039969 3.059149 0.8764162
##       InvSimpson   Fisher  Name Reads Group Time_point Groupe Depth
## TF_25   8.812444 22.59075 TF_25 34511     A         TF   TF_A  5692
## TF_26   9.554081 19.33860 TF_26 36503     A         TF   TF_A  5692
## TF_27   9.181993 16.81721 TF_27 42363     A         TF   TF_A  5692
## TF_31  11.454579 19.33860 TF_31 33042     A         TF   TF_A  5692
## TF_32  16.648304 27.09080 TF_32 42068     A         TF   TF_A  5692
## TF_33  15.582730 22.15089 TF_33 33480     A         TF   TF_A  5692
## TF_37  17.542789 29.40780 TF_37 40689     B         TF   TF_B  5692
## TF_38  24.174757 28.94092 TF_38 28018     B         TF   TF_B  5692
## TF_39  24.619459 29.87640 TF_39 37592     B         TF   TF_B  5692
## TF_43  15.314875 28.01236 TF_43 32693     B         TF   TF_B  5692
## TF_44  12.485679 25.04326 TF_44 35838     B         TF   TF_B  5692
## TF_45  13.926130 23.92156 TF_45 30030     B         TF   TF_B  5692
## TF_49  20.892815 28.24385 TF_49 26887     C         TF   TF_C  5692
## TF_50  19.403376 28.47577 TF_50 30963     C         TF   TF_C  5692
## TF_51  18.116887 27.09080 TF_51 33423     C         TF   TF_C  5692
## TF_55  12.685588 24.14499 TF_55 32956     C         TF   TF_C  5692
## TF_56  11.839457 22.81139 TF_56 34997     C         TF   TF_C  5692
## TF_57  10.937536 23.69860 TF_57 29318     C         TF   TF_C  5692
## TF_61  10.943972 19.12570 TF_61 36616     D         TF   TF_D  5692
## TF_62  20.269889 23.25406 TF_62 32423     D         TF   TF_D  5692
## TF_63  17.952931 22.37059 TF_63 29127     D         TF   TF_D  5692
## TF_67  20.253138 27.55070 TF_67 30057     D         TF   TF_D  5692
## TF_68  12.279925 25.04326 TF_68 32306     D         TF   TF_D  5692
## TF_69   8.091673 23.03249 TF_69 34686     D         TF   TF_D  5692
#write.xlsx(richness.table,"Richness table T11.xlsx",row.names=TRUE,col.names = TRUE, sep="\t",dec=",",na=" ")
#write.table(richness.table,"Richness_table_Rouen.csv",row.names=FALSE, sep="\t",dec=",",na=" ")

TABLEAU CI DESSUS AU FORMAT EXCEL DANS LE FICHIER “TABLEAU DE VALEURS” JOINT AVEC CE DOCUMENT

3.4 Statistiques sur la diversité

3.4.0.1 Statistiques sur Richness Observed

3.4.0.1.1 Anova sur la Richness Observed

Analyse de la variance, compare les moyennes d’échantillons.

div_data <- cbind(estimate_richness(frogs.data.rare, measures = "Observed"),  
                  sample_data(frogs.data.rare))
model <- aov(Observed ~ 0 + Group, data = div_data)
anova(model)
## Analysis of Variance Table
## 
## Response: Observed
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## Group      4 428613  107153  647.09 < 2.2e-16 ***
## Residuals 20   3312     166                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## impact of Groupe and Time_point on richness after correction for depth / seulement possible si T0 et Tf
#observed.aov <- aov(Observed ~ Depth + Groupe + Time_point, data = richness.table)
#anova(observed.aov)                  

Df: nombre de degrés de liberté Sum Sq: inertie inter et intra respectivement Mean Sq: c’est la variance, obtenue en faisant le quotient de la 2eme colonne par la 1ere (moyenne des inerties) F value: obtenue en faisant quotient des 2 valeurs trouvées dans la 3eme colonne Pr(>F): pvalue

3.4.0.1.2 Coefficient Richness Observed

coef function: permet d’extraire que les coefficients estimés, mesure l’écart des groupes à la moyenne. Groupes équilibrés si les valeurs sont du même ordre de grandeur.

coef(model)
##   GroupA   GroupB   GroupC   GroupD 
## 118.5000 146.8333 139.0000 128.5000
3.4.0.1.3 Test de comparaisons multiples sur Richness Observed

Utilisé pour déterminer les différences significatives entre les moyennes de groupes dans une analyse de variance. Pour étudier les différences inter-groupes, permet de distinguer parmi les éechantillons s’il y en a qui différent significativement des autres.

TukeyHSD(model)     
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Observed ~ 0 + Group, data = div_data)
## 
## $Group
##           diff         lwr       upr     p adj
## B-A  28.333333   7.5386839 49.127983 0.0055144
## C-A  20.500000  -0.2946494 41.294649 0.0542051
## D-A  10.000000 -10.7946494 30.794649 0.5458805
## C-B  -7.833333 -28.6279827 12.961316 0.7202204
## D-B -18.333333 -39.1279827  2.461316 0.0962765
## D-C -10.500000 -31.2946494 10.294649 0.5059833

diff: différences entre les moyennes obséervées lwr et upr: donnent les bornes inférieure et supérieur de l’intervalle p adj: pvalue après ajustement pour les comparaisons multiples

comparison_data <- compare_means(
  Observed ~ Group,
  data = div_data, 
  method = "wilcox.test"
  ) %>% filter(p.adj <= 0.05)
comparison_data
## # A tibble: 0 x 8
## # … with 8 variables: .y. <chr>, group1 <chr>, group2 <chr>, p <dbl>,
## #   p.adj <dbl>, p.format <chr>, p.signif <chr>, method <chr>
# comparison_data <- compare_means(
#   Observed ~ Group,
#   data = div_data, 
#   method = "wilcox.test"
#   ) %>% filter(p.adj <= 0.05) %>% 
#   mutate(p.signif = symnum(
#     p.adj, 
#     cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), 
#     symbols = c("****", "***", "**", "*", "ns"))) %>% 
#   mutate(y = c(185, 140, 160, 170, 165), 
#          x = c(1.5, 2, 3.5, 3, 3.5))
# 
# ggplot(div_data, aes(x = Group, y = Observed)) + 
#   geom_point() + 
#   geom_boxplot(aes(fill = Group)) + 
#   geom_segment(data = comparison_data, 
#                aes(x = group1, xend = group2, y = y, yend = y)) + 
#   geom_text(data = comparison_data, 
#                aes(x = x, y = y, label = p.signif), nudge_y =1 ) #nudge_y=1 retrait etoile par rapport a la barre 

3.4.0.2 Statistiques sur Richness Chao1

3.4.0.2.1 Anova sur la Richness Chao1
div_data <- cbind(estimate_richness(frogs.data.rare, measures = "Chao1"),  
                  sample_data(frogs.data.rare))
model <- aov(Chao1 ~ 0 + Group, data = div_data)
anova(model)
## Analysis of Variance Table
## 
## Response: Chao1
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## Group      4 522103  130526  552.46 < 2.2e-16 ***
## Residuals 20   4725     236                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
3.4.0.2.2 Coefficient Chao1
coef(model)
##   GroupA   GroupB   GroupC   GroupD 
## 136.5448 160.5663 148.5717 143.2397
3.4.0.2.3 Test de comparaisons multiples sur Richness Chao1
TukeyHSD(model)     
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Chao1 ~ 0 + Group, data = div_data)
## 
## $Group
##           diff         lwr       upr     p adj
## B-A  24.021512  -0.8173599 48.860384 0.0602594
## C-A  12.026852 -12.8120193 36.865724 0.5403591
## D-A   6.694901 -18.1439708 31.533773 0.8737170
## C-B -11.994659 -36.8335311 12.844212 0.5425251
## D-B -17.326611 -42.1654827  7.512261 0.2388774
## D-C  -5.331952 -30.1708233 19.506920 0.9306014
comparison_data <- compare_means(
  Chao1 ~ Group,
  data = div_data, 
  method = "wilcox.test"
  ) %>% filter(p.adj <= 0.05)
comparison_data
## # A tibble: 0 x 8
## # … with 8 variables: .y. <chr>, group1 <chr>, group2 <chr>, p <dbl>,
## #   p.adj <dbl>, p.format <chr>, p.signif <chr>, method <chr>
# comparison_data <- compare_means(
#   Chao1 ~ Group,
#   data = div_data, 
#   method = "wilcox.test"
#   ) %>% filter(p.adj <= 0.05) %>% 
#   mutate(p.signif = symnum(
#     p.adj, 
#     cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), 
#     symbols = c("****", "***", "**", "*", "ns"))) %>% 
#   mutate(y = c(200, 160, 180), 
#          x = c(1.5, 2, 3.5))
# 
# ggplot(div_data, aes(x = Group, y = Chao1)) + 
#   geom_point() + 
#   geom_boxplot(aes(fill = Group)) + 
#   geom_segment(data = comparison_data, 
#                aes(x = group1, xend = group2, y = y, yend = y)) + 
#   geom_text(data = comparison_data, 
#                aes(x = x, y = y, label = p.signif), nudge_y =1 ) #nudge_y=1 retrait etoile par rapport a la barre 

3.4.0.3 Statistiques sur Richness ACE

#####Anova sur la Richness ACE

div_data <- cbind(estimate_richness(frogs.data.rare, measures = "ACE"),  
                  sample_data(frogs.data.rare))
model <- aov(ACE ~ 0 + Group, data = div_data)
anova(model)
## Analysis of Variance Table
## 
## Response: ACE
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## Group      4 509673  127418  642.68 < 2.2e-16 ***
## Residuals 20   3965     198                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
3.4.0.3.1 Coefficient ACE
coef(model)
##   GroupA   GroupB   GroupC   GroupD 
## 131.8430 158.6322 149.5124 141.5799
3.4.0.3.2 Test de comparaisons multiples sur Richness ACE
TukeyHSD(model)     
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = ACE ~ 0 + Group, data = div_data)
## 
## $Group
##           diff        lwr       upr     p adj
## B-A  26.789200   4.035502 49.542897 0.0174674
## C-A  17.669409  -5.084288 40.423107 0.1648824
## D-A   9.736900 -13.016797 32.490598 0.6352594
## C-B  -9.119790 -31.873488 13.633907 0.6807042
## D-B -17.052299 -39.805997  5.701398 0.1879308
## D-C  -7.932509 -30.686207 14.821188 0.7645540
comparison_data <- compare_means(
  ACE ~ Group,
  data = div_data, 
  method = "wilcox.test"
  ) %>% filter(p.adj <= 0.05)
comparison_data
## # A tibble: 0 x 8
## # … with 8 variables: .y. <chr>, group1 <chr>, group2 <chr>, p <dbl>,
## #   p.adj <dbl>, p.format <chr>, p.signif <chr>, method <chr>
# comparison_data <- compare_means(
#   ACE ~ Group,
#   data = div_data, 
#   method = "wilcox.test"
#   ) %>% filter(p.adj <= 0.05) %>% 
#   mutate(p.signif = symnum(
#     p.adj, 
#     cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), 
#     symbols = c("****", "***", "**", "*", "ns"))) %>% 
#   mutate(y = c(200, 160, 180), 
#          x = c(1.5, 2, 3.5))
# 
# ggplot(div_data, aes(x = Group, y = ACE)) + 
#   geom_point() + 
#   geom_boxplot(aes(fill = Group)) + 
#   geom_segment(data = comparison_data, 
#                aes(x = group1, xend = group2, y = y, yend = y)) + 
#   geom_text(data = comparison_data, 
#                aes(x = x, y = y, label = p.signif), nudge_y =1 ) #nudge_y=1 retrait etoile par rapport a la barre 

3.4.0.4 Statistiques sur Richness shannon

3.4.0.4.1 Anova sur la Richness Shannon
div_data <- cbind(estimate_richness(frogs.data.rare, measures = "Shannon"),  
                  sample_data(frogs.data.rare))
model <- aov(Shannon ~ 0 + Group, data = div_data)
anova(model)
## Analysis of Variance Table
## 
## Response: Shannon
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## Group      4 272.665  68.166  1199.4 < 2.2e-16 ***
## Residuals 20   1.137   0.057                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
3.4.0.4.2 Coefficient Shannon
coef(model)
##   GroupA   GroupB   GroupC   GroupD 
## 3.225497 3.534355 3.370455 3.344948
3.4.0.4.3 Test de comparaisons multiples sur Richness Shannon
TukeyHSD(model)     
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Shannon ~ 0 + Group, data = div_data)
## 
## $Group
##            diff        lwr       upr     p adj
## B-A  0.30885758 -0.0763832 0.6940984 0.1455726
## C-A  0.14495720 -0.2402836 0.5301980 0.7209027
## D-A  0.11945026 -0.2657905 0.5046910 0.8211981
## C-B -0.16390038 -0.5491412 0.2213404 0.6394364
## D-B -0.18940732 -0.5746481 0.1958335 0.5279221
## D-C -0.02550695 -0.4107477 0.3597338 0.9976643
comparison_data <- compare_means(
  Shannon ~ Group,
  data = div_data, 
  method = "wilcox.test"
  ) %>% filter(p.adj <= 0.05)
comparison_data
## # A tibble: 0 x 8
## # … with 8 variables: .y. <chr>, group1 <chr>, group2 <chr>, p <dbl>,
## #   p.adj <dbl>, p.format <chr>, p.signif <chr>, method <chr>
# comparison_data <- compare_means(
#   Shannon ~ Group,
#   data = div_data, 
#   method = "wilcox.test"
#   ) %>% filter(p.adj <= 0.05) %>% 
#   mutate(p.signif = symnum(
#     p.adj, 
#     cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), 
#     symbols = c("****", "***", "**", "*", "ns"))) %>% 
#   mutate(y = c(4, 4.5, 4.3,4.2), 
#          x = c(1.5, 2, 3.5,4))
# 
# ggplot(div_data, aes(x = Group, y = Shannon)) + 
#   geom_point() + 
#   geom_boxplot(aes(fill = Group)) + 
#   geom_segment(data = comparison_data, 
#                aes(x = group1, xend = group2, y = y, yend = y)) + 
#   geom_text(data = comparison_data, 
#                aes(x = x, y = y, label = p.signif), nudge_y =1 ) #nudge_y=1 retrait etoile par rapport a la barre 

3.4.0.5 Statistiques sur Richness Simpson

3.4.0.5.1 Anova sur la Richness Simpson
div_data <- cbind(estimate_richness(frogs.data.rare, measures = "Simpson"),  
                  sample_data(frogs.data.rare))
model <- aov(Simpson ~ 0 + Group, data = div_data)
anova(model)
## Analysis of Variance Table
## 
## Response: Simpson
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## Group      4 20.6215  5.1554   10179 < 2.2e-16 ***
## Residuals 20  0.0101  0.0005                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
3.4.0.5.2 Coefficient Simpson
coef(model)
##    GroupA    GroupB    GroupC    GroupD 
## 0.9102345 0.9406363 0.9317801 0.9248662
3.4.0.5.3 Test de comparaisons multiples sur Richness Simpson
TukeyHSD(model)     
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Simpson ~ 0 + Group, data = div_data)
## 
## $Group
##             diff          lwr        upr     p adj
## B-A  0.030401786 -0.005965199 0.06676877 0.1223289
## C-A  0.021545673 -0.014821312 0.05791266 0.3706634
## D-A  0.014631758 -0.021735227 0.05099874 0.6781601
## C-B -0.008856113 -0.045223098 0.02751087 0.9028683
## D-B -0.015770028 -0.052137013 0.02059696 0.6256180
## D-C -0.006913915 -0.043280900 0.02945307 0.9502164

3.4.0.6 Statistiques sur Richness InvSimpson

3.4.0.6.1 Anova sur la Richness InvSimpson
div_data <- cbind(estimate_richness(frogs.data.rare, measures = "InvSimpson"),  
                  sample_data(frogs.data.rare))
model <- aov(InvSimpson ~ 0 + Group, data = div_data)
anova(model)
## Analysis of Variance Table
## 
## Response: InvSimpson
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## Group      4 5604.5  1401.1  66.103 3.045e-11 ***
## Residuals 20  423.9    21.2                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
3.4.0.6.2 Coefficient InvSimpson
coef(model)
##   GroupA   GroupB   GroupC   GroupD 
## 11.87236 18.01061 15.64594 14.96525
3.4.0.6.3 Test de comparaisons multiples sur Richness InvSimpson
TukeyHSD(model)     
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = InvSimpson ~ 0 + Group, data = div_data)
## 
## $Group
##           diff        lwr       upr     p adj
## B-A  6.1382598  -1.301546 13.578065 0.1293742
## C-A  3.7735880  -3.666218 11.213394 0.5022458
## D-A  3.0928996  -4.346906 10.532705 0.6557963
## C-B -2.3646719  -9.804477  5.075134 0.8102475
## D-B -3.0453602 -10.485166  4.394445 0.6664972
## D-C -0.6806883  -8.120494  6.759117 0.9939189

3.4.0.7 Statistiques sur Richness Fisher

3.4.0.7.1 Anova sur la Richness Fisher
div_data <- cbind(estimate_richness(frogs.data.rare, measures = "Fisher"),  
                  sample_data(frogs.data.rare))
model <- aov(Fisher ~ 0 + Group, data = div_data)
anova(model)
## Analysis of Variance Table
## 
## Response: Fisher
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## Group      4 14511.5  3627.9  441.31 < 2.2e-16 ***
## Residuals 20   164.4     8.2                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
3.4.0.7.2 Coefficient Fisher
coef(model)
##   GroupA   GroupB   GroupC   GroupD 
## 21.22114 27.53372 25.74423 23.39613
3.4.0.7.3 Test de comparaisons multiples sur Richness Fisher
TukeyHSD(model)     
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Fisher ~ 0 + Group, data = div_data)
## 
## $Group
##          diff        lwr        upr     p adj
## B-A  6.312576  1.6792872 10.9458651 0.0055175
## C-A  4.523089 -0.1101996  9.1563784 0.0572392
## D-A  2.174992 -2.4582975  6.8082805 0.5651326
## C-B -1.789487 -6.4227757  2.8438022 0.7047377
## D-B -4.137585 -8.7708736  0.4957043 0.0905876
## D-C -2.348098 -6.9813868  2.2851911 0.5029462

4 Diversité \(\beta\)

Diversité entre les communautés, comparaison des échantillons entre eux / les communautés entre elles.

  • Bray-Curtis (dissimilarité) / Info abondance

Bray curtis nous permet de visualiser la composition, l’abondance, la dominance, etc., de populations ou de communautés dans l’écosystème et peut être utilisé pour obtenir un résultat du degré de dissemblance entre les espèces. L’indice de Bray-Curtis donne le même poids aux différences d’abondances observées pour les espèces rares que pour les espèces importantes. Si =1 cela signifie que les communautés sont totalement différentes.

  • Jaccard (similarité) / Info présence - absence

Cet indice va de 0 (les deux échantillons partagent les mêmes OTUs) à 1 (les deux échantillons n’ont aucun OTU en commun). Exemple: if 30% of taxa are in both samples, this means 70% are only found in one sample, and the Jaccard distance is 0.7

  • Unifrac (phylogénie) prend en compte arbre phylogénique et l’abondance : la distance UniFrac entre deux échantillons se base sur les longueurs des branches de l’arbre partagées entre ces deux échantillons. Unifrac = Nb de branches contenant des communautés identiques / Nb total de branches. Si unifrac= 0 Branches partagées Si unfrac= 1 Pas de branches partagées

  • Weighted UniFrac (pondéré) intègre les abondances lors du calcul des longueurs de branches partagées / non partagées pour calculer la distance, de sorte que l’impact des caractéristiques de faible abondance est diminué.

UniFrac pondéré est utile pour examiner les différences dans la structure de la communauté; UniFrac non pondéré est plus sensible aux différences de caractéristiques de faible abondance. Les deux sont donc utiles à interpréter ensemble et il y a des cas où les deux, un seul ou les deux peuvent produire des différences significatives entre les groupes expérimentaux.

4.1 Calcul des distances

4.1.0.1 Matrice Bray Curtis

SampleOrder <- levels(reorder(sample_names(frogs.data.rare), as.numeric(get_variable(frogs.data.rare, "Group"))))
dist.bc <- distance(frogs.data.rare, "bray")
plot_dist_as_heatmap(dist.bc, order = SampleOrder,title="Bray Distances", show.names = T)

4.1.0.2 Matrice Jaccard

SampleOrder <- levels(reorder(sample_names(frogs.data.rare), as.numeric(get_variable(frogs.data.rare, "Group"))))
dist.jac <- distance(frogs.data.rare, "cc")
plot_dist_as_heatmap(dist.jac, order = SampleOrder,title="Jaccard Distances", show.names = T)

4.1.0.3 Matrice unifrac

SampleOrder <- levels(reorder(sample_names(frogs.data.rare), as.numeric(get_variable(frogs.data.rare, "Group"))))
dist.uf <- distance(frogs.data.rare, "unifrac")
plot_dist_as_heatmap(dist.uf, order = SampleOrder,title="Unifrac Distances", show.names = T)

4.1.0.4 Matrice Weighted Unifrac

SampleOrder <- levels(reorder(sample_names(frogs.data.rare), as.numeric(get_variable(frogs.data.rare, "Group"))))
dist.wuf <- distance(frogs.data.rare, "wunifrac")
plot_dist_as_heatmap(dist.wuf, order = SampleOrder,title="Weighted Unifrac Distances", show.names = T)

4.2 Ordination des échantillons

On va faire une MDS (non-parametric multi-dimensional scaling) aussi appelé PCoA pour avoir une représentation en 2D de la distance entre tous les échantillons. Permet de visusaliser les similitudes entre les groupes.

La mise à l’échelle multidimensionnelle (MDS) est une approche populaire pour représenter graphiquement les relations entre des objets (par exemple, des tracés ou des échantillons) dans un espace multidimensionnel.

4.2.0.1 Plot ordination Bray Curtis

ord <- ordinate(frogs.data.rare, method = "MDS", distance = "bray")
family.palette<- c(
                   "A"           = "#09C51F", 
                   "B"          = "#8BE9F9", 
                   "C"           = "#0995C5", 
                   "D"          = "#094DC5")

#Si on veut non des groupes sur ellipses:
#p <- plot_samples(frogs.data.rare, ord, color = "Groupe",shape="Groupe")

p <- plot_ordination(frogs.data.rare, ord, color = "Group",shape="Group")
p <- p + theme_bw() + ggtitle("MDS + Bray Curtis")+ scale_color_manual(values = family.palette)
p <- p + stat_ellipse(aes(group = Groupe))+geom_point(size=2)
#ggplotly(p)
plot(p)

Legende dans le graph et pvalue affichée

ord <- ordinate(frogs.data.rare, method = "MDS", distance = "bray")
family.palette<- c( 
                   "A"           = "#09C51F", 
                   "B"          = "#8BE9F9", 
                   "C"           = "#0995C5", 
                   "D"          = "#094DC5")

p <- plot_ordination(frogs.data.rare, ord, color = "Group",shape="Group")
p <- p + theme_bw() + ggtitle("MDS + Bray Curtis")+ scale_color_manual(values = family.palette)
p2 <- p + stat_ellipse(aes(group = Group),size=1) + #taille ellipses
                           geom_point(size=2.5)+ #taille points
  theme(legend.position = c(0.6,0.25)) + #place legende
  theme(  legend.title = element_text(color = "black", size = 10),#couleur et taille legende
  legend.text = element_text( size = 10)) +
  #Ajouter pval sur graph
    annotate("text", x = -0.35, y = -0.6, label = "p = 0.001 ***")
plot(p2)

#p2 <- p2 + theme(legend.position = c(0.05, 0.02), legend.justification = c(0, 0))
#- legend.position g?re la position dans le cadran: (0, 0) en bas ?
#gauche, (0.5, 0.5) au milieu, (0, 1) en haut ? gauche, (1, 1) en haut ? droite, etc
#- legend.justification g?re l'alignement par rapport ? la position:
#(0.5, 0.5) pour centrer la l?gende autour de legend.position, (0, 0)
#pour aligner son coin inf?rieur gauche avec legend.position, (0, 1) pour aligner son coin sup?rieur gauche, etc.

#ggsave(p, file = "/Users/NIPPONNOYE/Documents/2020 Boulot Mag/2020 Télétravail/Teletravail_20201001/Bray.eps", width = 10, height = 7)

Combo avec 3 graphiques pour publi

#install.packages("gridExtra")
library("gridExtra")
#install.packages("cowplot")
library("cowplot")
pcombo<-ggdraw() +
  draw_plot(p1, 0, .5, 1, .5) +
  draw_plot(p2, 0, 0, .5, .5) +
  draw_plot(p3, .5, 0, .5, .5) +
  draw_plot_label(c("A", "B", "C"), c(0, 0, 0.5), c(1, 0.5, 0.5), size = 20)+panel_border()
plot(pcombo)

#ggsave(p, file = "C:/Users/mmonnoye/Documents/figure_publi_10x10.tiff", width = 10, height = 10)
#ggsave(p, file = "C:/Users/mmonnoye/Documents/figure_publi_10x10.png", width = 10, height = 10)

Nom échantillon sur points

ord <- ordinate(frogs.data.rare, method = "MDS", distance = "bray")
family.palette<- c(
                   "A"           = "#09C51F", 
                   "B"          = "#8BE9F9", 
                   "C"           = "#0995C5", 
                   "D"          = "#094DC5")

p <- plot_ordination(frogs.data.rare, ord, color = "Group",label="Name",shape="Group")
p <- p + theme_bw() + ggtitle("MDS + Bray Curtis")+ geom_point(size = 2)+ scale_color_manual(values = family.palette)
#ggplotly(p)
plot(p)

#script pour ajuster les noms sur les points
#p = plot_ordination(run_invert, SMP.ord.invert, color = "Site", shape = "Species", label = "Run_no")
# p + theme_bw() + geom_text(mapping = aes(label = Run_no), size = 10, vjust = 1.5) theme(text = element_text(size = 16)) + geom_point(size = 4)
#ou
#p = plot_ordination(run_invert, SMP.ord.invert, color = "Site", shape = "Species", label = "Run_no")
# + geom_text(aes(label = Run_no), size = 10, vjust = 1.5)+ geom_point(size = 4)+theme_bw()

Une facette wrap par groupe

4.2.0.2 Plot ordination Jaccard

ord <- ordinate(frogs.data.rare, method = "MDS", distance = "cc")
family.palette<- c("Chow"                = "#FA6466", 
                   "HFPalm"           = "#D9AE0F", 
                   "A"           = "#09C51F", 
                   "B"          = "#8BE9F9", 
                   "C"           = "#0995C5", 
                   "D"          = "#094DC5")
p <- plot_ordination(frogs.data.rare, ord, color = "Group",shape="Group")
p <- p + theme_bw() + ggtitle("MDS + Jaccard ")
p <- p + stat_ellipse(aes(group = Group))+ geom_point(size = 2)+ scale_color_manual(values = family.palette)+ facet_wrap(~Group, ncol = 6)
#ggplotly(p)
plot(p)

Couleurs de base

ord <- ordinate(frogs.data.rare, method = "MDS", distance = "cc")

p <- plot_ordination(frogs.data.rare, ord, color = "Group",label="Name",shape="Group")
p <- p + theme_bw() + ggtitle("MDS + Jaccard ")+ geom_point(size = 2)

#ggplotly(p)
plot(p)

4.2.0.3 Plot ordination Unifrac

ord <- ordinate(frogs.data.rare, method = "MDS", distance = "unifrac")
family.palette<- c( 
                   "A"           = "#09C51F", 
                   "B"          = "#8BE9F9", 
                   "C"           = "#0995C5", 
                   "D"          = "#094DC5")
p <- plot_ordination(frogs.data.rare, ord, color = "Group",shape="Group")
p <- p + theme_bw() + ggtitle("MDS + Unifrac")
p <- p + stat_ellipse(aes(group = Group))+ geom_point(size = 2)+ scale_color_manual(values = family.palette)
#ggplotly(p)
plot(p)

ord <- ordinate(frogs.data.rare, method = "MDS", distance = "unifrac")
family.palette<- c(
                   "A"           = "#09C51F", 
                   "B"          = "#8BE9F9", 
                   "C"           = "#0995C5", 
                   "D"          = "#094DC5")
p <- plot_ordination(frogs.data.rare, ord, color = "Group",label="Name",shape="Group")
p <- p + theme_bw() + ggtitle("MDS + Unifrac")+ geom_point(size = 2)+ scale_color_manual(values = family.palette)
#ggplotly(p)
plot(p)

4.2.0.4 Plot ordination Weighted Unifrac

ord <- ordinate(frogs.data.rare, method = "MDS", distance = "wUnifrac")
family.palette<- c(
                   "A"           = "#09C51F", 
                   "B"          = "#8BE9F9", 
                   "C"           = "#0995C5", 
                   "D"          = "#094DC5")
p <- plot_ordination(frogs.data.rare, ord, color = "Group",shape="Group")
p <- p + theme_bw() + ggtitle("MDS + wUnifrac")
p <- p + stat_ellipse(aes(group = Group))+ geom_point(size = 2)+ scale_color_manual(values = family.palette)
#ggplotly(p)
plot(p)

ord <- ordinate(frogs.data.rare, method = "MDS", distance = "wUnifrac")
family.palette<- c(
                   "A"           = "#09C51F", 
                   "B"          = "#8BE9F9", 
                   "C"           = "#0995C5", 
                   "D"          = "#094DC5")
p <- plot_ordination(frogs.data.rare, ord, color = "Group",label="Name",shape="Group")
p <- p + theme_bw() + ggtitle("MDS + wUnifrac")+ geom_point(size = 2)+ scale_color_manual(values = family.palette)
#ggplotly(p)
plot(p)

4.3 ANOVA Multivariée

Pour confirmer l’effet du régime sur les distances, on va faire une permanova. Analyse multivariée de la variance par permutations basée sur les matrices de distances.

4.3.0.1 Adonis sur la matrice Bray Curtis

adonis(dist.bc ~ Group,data = as(sample_data(frogs.data.rare), 'data.frame'))
## 
## Call:
## adonis(formula = dist.bc ~ Group, data = as(sample_data(frogs.data.rare),      "data.frame")) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs  MeanSqs F.Model      R2 Pr(>F)   
## Group      3   0.45015 0.150049  1.7973 0.21235   0.01 **
## Residuals 20   1.66972 0.083486         0.78765          
## Total     23   2.11986                  1.00000          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Df: degree of freedom. Sums Of Sqs: sum of squares. MeanSqs: sum of squares by degree of freedom. F: F statistics. R2: partial R-squared. Pr(>F) p-value based

4.3.0.2 Adonis sur la matrice Jaccard

adonis(dist.jac ~ Group,data = as(sample_data(frogs.data.rare), 'data.frame'))
## 
## Call:
## adonis(formula = dist.jac ~ Group, data = as(sample_data(frogs.data.rare),      "data.frame")) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs  MeanSqs F.Model      R2 Pr(>F)    
## Group      3   0.43359 0.144530  1.9459 0.22594  0.001 ***
## Residuals 20   1.48547 0.074274         0.77406           
## Total     23   1.91906                  1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4.3.0.3 Adonis sur la matrice Unifrac

adonis(dist.uf ~ Group,data = as(sample_data(frogs.data.rare), 'data.frame'))
## 
## Call:
## adonis(formula = dist.uf ~ Group, data = as(sample_data(frogs.data.rare),      "data.frame")) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs  MeanSqs F.Model      R2 Pr(>F)  
## Group      3   0.17031 0.056771  1.7581 0.20868  0.011 *
## Residuals 20   0.64584 0.032292         0.79132         
## Total     23   0.81615                  1.00000         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4.3.0.4 Adonis sur la matrice wUnifrac

adonis(dist.wuf ~ Group,data = as(sample_data(frogs.data.rare), 'data.frame'))
## 
## Call:
## adonis(formula = dist.wuf ~ Group, data = as(sample_data(frogs.data.rare),      "data.frame")) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs  MeanSqs F.Model      R2 Pr(>F)   
## Group      3   0.17102 0.057008   2.353 0.26087  0.009 **
## Residuals 20   0.48456 0.024228         0.73913          
## Total     23   0.65558                  1.00000          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4.4 Clustering des échantillons

On peut aussi faire un clustering des échantillons pour vérifier s’ils se regroupent par groupe (ou autre).

Ward : c’est la plus courante. Elle consiste à réunir les deux clusters dont le regroupement fera le moins baisser l’inertie interclasse. C’est la distance de Ward qui est utilisée : la distance entre deux classes est celle de leurs barycentres au carré, pondérée par les effectifs des deux clusters. Cette technique tend à regrouper les petites classes entre elles.

Single : méthode du saut minimum prend en compte la plus petite distance entre A et B.

Complete : méthode du diamètre qui prend en compte la plus grande distance entre A et B.

Median : moyenne de tous les liens, qu’ils soient entre observations de deux clusters différents ou intraclasses. Cette méthode est la seule qui s’attache directement au cluster obtenu et non aux caractéristiques des clusters candidats.

Average : le logiciel mesure tous les liens entre chaque observation du cluster A et chaque observation du cluster B et en fait une moyenne. C’est une des méthodes les plus efficaces. Elle tend à réunir des clusters aux inerties faibles.

4.4.0.1 Clustering sur la matrice de Bray Curtis

#family.palette<- c("Chow"                = "#FA6466", 
#                   "HFPalm"           = "#D9AE0F", 
#                   "A"           = "#09C51F", 
#                   "C"           = "#0995C5", 
#                   "D"          = "#094DC5")

#plot_clust(frogs.data.rare, dist = dist.wuf, method = "ward.D2", color ="Group", palette = family.palette)
#plot_clust(frogs.data.rare, dist = dist.bc, method = "ward.D", color = "Group", palette = family.palette)
#plot_clust(frogs.data.rare, dist = dist.bc, method = "single", color = "Group", palette = family.palette)
#plot_clust(frogs.data.rare, dist = dist.bc, method = "complete", color = "Group", palette = family.palette)
#plot_clust(frogs.data.rare, dist = dist.bc, method = "median", color = "Group", palette = family.palette)
#plot_clust(frogs.data.rare, dist = dist.bc, method = "average", color = "Group", palette = family.palette)

plot_clust(frogs.data.rare, dist = dist.wuf, method = "ward.D2", color ="Group")

plot_clust(frogs.data.rare, dist = dist.bc, method = "ward.D", color = "Group")

plot_clust(frogs.data.rare, dist = dist.bc, method = "single", color = "Group")

plot_clust(frogs.data.rare, dist = dist.bc, method = "complete", color = "Group")

plot_clust(frogs.data.rare, dist = dist.bc, method = "median", color = "Group")

plot_clust(frogs.data.rare, dist = dist.bc, method = "average", color = "Group")

4.4.0.2 Clustering sur la matrice de Jaccard

plot_clust(frogs.data.rare, dist = dist.jac, method = "ward.D2", color = "Group", palette = family.palette)

plot_clust(frogs.data.rare, dist = dist.jac, method = "ward.D", color = "Group", palette = family.palette)

plot_clust(frogs.data.rare, dist = dist.jac, method = "single", color = "Group", palette = family.palette)

plot_clust(frogs.data.rare, dist = dist.jac, method = "complete", color = "Group", palette = family.palette)

plot_clust(frogs.data.rare, dist = dist.jac, method = "median", color = "Group", palette = family.palette)

plot_clust(frogs.data.rare, dist = dist.jac, method = "average", color = "Group", palette = family.palette)

4.4.0.3 Clustering sur la matrice Unifrac

plot_clust(frogs.data.rare, dist = dist.uf, method = "ward.D2", color = "Group", palette = family.palette)

plot_clust(frogs.data.rare, dist = dist.uf, method = "ward.D", color = "Group", palette = family.palette)

plot_clust(frogs.data.rare, dist = dist.uf, method = "single", color = "Group", palette = family.palette)

plot_clust(frogs.data.rare, dist = dist.uf, method = "complete", color = "Group", palette = family.palette)

plot_clust(frogs.data.rare, dist = dist.uf, method = "median", color = "Group", palette = family.palette)

plot_clust(frogs.data.rare, dist = dist.uf, method = "average", color = "Group", palette = family.palette)

4.4.0.4 Clustering sur la matrice Weighted Unifrac

plot_clust(frogs.data.rare, dist = dist.wuf, method = "ward.D", color="Group", palette = family.palette)

plot_clust(frogs.data.rare, dist = dist.wuf, method = "single", color = "Group", palette = family.palette)

plot_clust(frogs.data.rare, dist = dist.wuf, method = "complete", color = "Group", palette = family.palette)

plot_clust(frogs.data.rare, dist = dist.wuf, method = "median", color = "Group", palette = family.palette)

plot_clust(frogs.data.rare, dist = dist.wuf, method = "average", color = "Group", palette = family.palette)

5 PCA

L’analyse des composants principaux permet d’extraire les informations importantes d’un tableau de données multivariées et d’exprimer ces informations sous la forme d’un ensemble de quelques nouvelles variables appelées composants principaux. L’ACP va conduire à un sous espace de plus petite dimension, tel que la projection sur ce sous-espace retienne la majeure partie de l’information.

PCA sur les 50 OTU les plus représentés

family.palette<- c(
                   "A"           = "#09C51F", 
                   "B"          = "#8BE9F9", 
                   "C"           = "#0995C5", 
                   "D"          = "#094DC5")

#if(!require(devtools)) install.packages("devtools")
#devtools::install_github("kassambara/factoextra")
library("factoextra")

m <- as.data.frame(t(otu_table(frogs.data)))
m <- sqrt(m)
pca <- prcomp(m[colSums(m) != 0], center = TRUE, scale = TRUE)

p <- fviz_pca_biplot(pca, axes = c(1, 2), geom.ind = c("point", "text"), habillage = get_variable(frogs.data, "Group"), invisible = "quali", geom.var = c("arrow", "text"), select.var = list(contrib = 50), title = "Principal Component Analysis")
p<-p+ scale_color_manual(values = family.palette)+ scale_fill_manual(breaks = c("A","B", "C", "D"), 
                       values=c("#09C51F","#8BE9F9", "#0995C5", "#094DC5"))
plot(p + theme_bw())

6 Arbre phylogénétique

family.palette<- c(
                   "A"           = "#09C51F", 
                   "B"          = "#8BE9F9", 
                   "C"           = "#0995C5", 
                   "D"          = "#094DC5")
p <- plot_tree(physeq = prune_taxa(names(sort(taxa_sums(frogs.data), decreasing = TRUE)[1:20]), frogs.data), method = "sampledodge", color = "Group", shape = "Group", size = "abundance", sizebase = 5, ladderize = "left", plot.margin = 0, title = "Phylogenetic tree of the 20 most abundant OTU ")
p<-p+ scale_color_manual(values = family.palette)+ scale_fill_manual(breaks = c("A","B", "C", "D"), 
                       values=c("#09C51F","#8BE9F9", "#0995C5", "#094DC5"))
plot(p)

#Si on veut ajouter le rang taxonomique Family à la représentation:
#p <- plot_tree(physeq = prune_taxa(names(sort(taxa_sums(frogs.data), decreasing = TRUE)[1:20]), frogs.data), method = "sampledodge", color = "Groupe", shape = "Groupe", size = "abundance", label.tips = "Family", sizebase = 5, ladderize = "left", plot.margin = 0, title = "Phylogenetic tree of the 20 most abundant OTU ")
#plot(p)

7 Abondances différentielles globale à TF

DESeq : méthode de normalisation dérivées de la transcriptomique : Outils qui va chercher à estimer la variabilité biologique des différents échantillons afin de les normaliser et de calculer des ratios moyen pour chaque OTU. Il va également déterminer quels OTUs sont différentiellement exprimés en cherchant des valeurs de log2 de ratios significativement supérieures à 1 ou inférieures ? -1 et en employant pour cela le test de Wald.

Rappels: OTU / Operational Taxonomic Unit = Groupe d’individus dont les séquences 16S sont similaires à au moins 97% Abondance= nombre d’individus d’une espèce

Comparaisons entre ces 4 groupes: “A”, “B”,“C”, “D” à TF

On va chercher les OTUs différentiellement abondants entre les 6 groupes avec pval=0.05 et min.abundance=30

extract_daotus <- function(cond1, cond2, physeq, pval = 0.05) {
  res <- DESeq2::results(dds, contrast=c("Group", cond1, cond2)) %>% as.data.frame() %>% 
    mutate(OTU = taxa_names(physeq)) %>% filter(padj < pval) %>% 
    inner_join(tax_table(physeq) %>% as("matrix") %>% as_tibble(rownames = "OTU"), by = "OTU") %>%
    arrange(log2FoldChange)
  return(res)
}
build_plotdata_daotus <- function(da.otus, physeq, min.abundance = 50) {
  da.data <- rarefy_even_depth(physeq, rng = 20170329, verbose = FALSE) %>% 
    prune_taxa(da.otus$OTU, .)
  is_abundant <- function(x) { x > min.abundance }
  abundant_taxa <- genefilter_sample(da.data, is_abundant, A = 1)
  da.data <- prune_taxa(abundant_taxa, da.data)
  max.level <- apply(as(tax_table(da.data), "matrix"), 1, function(x) { 
                         y <- x[!is.na(x)]; 
                         y <- y[y != ""];  
                         y <- y[!grepl("unknown", y, ignore.case = T)]
                         y <- y[y != "Multi-affiliation"]
                         # if (names(y)[length(y)] != "Family") {
                         #     return(paste(y["Family"], y[length(y)], sep = "/"))
                         #} else {
                             return(y[length(y)])
                         # } 
                     })
otu.formatted.names <- data.frame(OTU2 = paste0(gsub(pattern = "Cluster_", "", names(max.level)), "_", max.level), 
                          OTU = names(max.level), 
                          stringsAsFactors = FALSE) %>% 
  inner_join(da.otus, by = "OTU") %>% arrange(log2FoldChange)
otu.order <- otu.formatted.names %>% pull(OTU2)
da.df <- merge(psmelt(da.data), otu.formatted.names) %>% mutate(OTU2 = factor(OTU2, levels = otu.order))
return(da.df)
}
is_present <- function(comptage) {comptage > 0} ## tester la présence d'un OTU
prevalent_taxa <- genefilter_sample(frogs.data, is_present, A = 2)
filtered.data <- prune_taxa(prevalent_taxa, frogs.data)
cds <- phyloseq_to_deseq2(filtered.data, design = ~ Group)
dds <- DESeq2::DESeq(cds)
da.otus_1 <- extract_daotus("A", "B", filtered.data, pval = 0.05)
da.otus_2 <- extract_daotus("A", "C", filtered.data, pval = 0.05)
da.otus_3 <- extract_daotus("A", "D", filtered.data, pval = 0.05)
da.otus_4 <- extract_daotus("B", "C", filtered.data, pval = 0.05)
da.otus_5 <- extract_daotus("B", "D", filtered.data, pval = 0.05)
da.otus_6 <- extract_daotus("C", "D", filtered.data, pval = 0.05)

da.otus <- bind_rows(da.otus_1,
                     da.otus_2,da.otus_3,
                     da.otus_4,da.otus_5,
                     da.otus_6) %>% 
  filter(!duplicated(OTU))

#Filtrer OTU avec pval<0,01
#da.otu<-subset(da.otus,padj<0.01)
#dim(da.otus)

Diagramme de Venn représentant la comparaison des OTUs differentiellement abondants

# da.otus_1 <- extract_daotus("A", "B", filtered.data, pval = 0.05)
# da.otus_2 <- extract_daotus("A", "C", filtered.data, pval = 0.05)
# da.otus_3 <- extract_daotus("A", "D", filtered.data, pval = 0.05)
# da.otus_4 <- extract_daotus("B", "C", filtered.data, pval = 0.05)
# da.otus_5 <- extract_daotus("B", "D", filtered.data, pval = 0.05)
# da.otus_6 <- extract_daotus("C", "D", filtered.data, pval = 0.05)
# da.otus <- bind_rows(da.otus_1,
#                      da.otus_2,
#                      da.otus_3,
#                      da.otus_4,
#                      da.otus_5,
#                      da.otus_6) %>%
#   filter(!duplicated(OTU))
# 
# venn.plot <- venn.diagram(list("A/B"  = da.otus_1$OTU,
#                                 "A/C" = da.otus_2$OTU,
#                                "A/D" = da.otus_3$OTU,
#                           "B/C"  = da.otus_4$OTU,
#                                 "B/D" = da.otus_5$OTU,
#                                "C/D" = da.otus_6$OTU),
#                           filename = NULL,cex = 3, cat.cex = 2)
# grid.newpage(); grid.draw(venn.plot)
#ggsave(venn.plot, file = "C:/Users/mmonnoye/Documents/Biom/Odorants/venn_plot.png", width = 21, height = 13)

7.0.0.1 OTUs qui passent les filtres avec pval=0.05

res <- inner_join(da.otus, 
                  ## Transformer la table taxonomique en tableau avec une colonne OTU
                  tax_table(frogs.data) %>% as("matrix") %>% as_tibble(rownames = "OTU"), 
                  by = "OTU")

res
##      baseMean log2FoldChange     lfcSE      stat       pvalue         padj
## 1   59.208946      -9.185524 1.6709031 -5.497341 3.855611e-08 2.866004e-06
## 2   14.706752      -8.166348 2.1461900 -3.805044 1.417788e-04 3.021270e-03
## 3   13.900219      -6.898993 1.8446423 -3.740016 1.840082e-04 3.419486e-03
## 4    3.015104      -5.636569 1.2413992 -4.540497 5.612182e-06 3.128791e-04
## 5    8.076004      -5.073970 1.2781357 -3.969821 7.192677e-05 2.004959e-03
## 6   43.483700      -4.919280 1.2970466 -3.792678 1.490313e-04 3.021270e-03
## 7    5.403261      -4.538722 1.1740815 -3.865764 1.107421e-04 2.743943e-03
## 8    1.955552      -4.417891 1.2602907 -3.505454 4.558287e-04 7.819215e-03
## 9   94.281842      -4.392355 0.7552902 -5.815454 6.046960e-09 6.742360e-07
## 10   5.482008      -4.063610 1.0174795 -3.993801 6.502248e-05 2.004959e-03
## 11 375.456923      -3.959179 0.5670440 -6.982137 2.907239e-12 6.483142e-10
## 12   1.985743      -3.793471 1.2407648 -3.057365 2.232922e-03 3.556726e-02
## 13  15.309849      -3.674789 0.9010442 -4.078368 4.535301e-05 1.685620e-03
## 14   9.993880      -3.062343 0.7160035 -4.276995 1.894329e-05 8.448706e-04
## 15  12.316053       1.633146 0.5418508  3.014014 2.578160e-03 3.832865e-02
## 16   6.218214      -6.756366 2.0438980 -3.305628 9.476385e-04 1.647694e-02
## 17   3.907696      -6.194004 2.1103870 -2.935008 3.335386e-03 3.332233e-02
## 18   1.702005      -5.134653 1.2790219 -4.014515 5.956815e-05 1.856541e-03
## 19   2.235932      -4.159548 1.3855904 -3.002004 2.682088e-03 3.134690e-02
## 20   2.103234      -4.099545 1.3989900 -2.930360 3.385691e-03 3.332233e-02
## 21   5.401424      -2.781896 0.9071836 -3.066520 2.165664e-03 2.901705e-02
## 22 402.124138      -2.687610 0.8767019 -3.065591 2.172399e-03 2.901705e-02
## 23  21.308089      -2.382748 0.6978631 -3.414349 6.393464e-04 1.264485e-02
## 24  96.765775       1.758121 0.5796869  3.032880 2.422316e-03 3.593102e-02
## 25  29.106484       2.412163 0.7495958  3.217952 1.291096e-03 2.298151e-02
## 26 217.573068      26.425441 4.1489050  6.369257 1.899465e-10 1.690524e-08
##             OTU Kingdom.x      Phylum.x     Class.x       Order.x
## 1   Cluster_103  Bacteria    Firmicutes  Clostridia Clostridiales
## 2    Cluster_64  Bacteria    Firmicutes  Clostridia Clostridiales
## 3   Cluster_122  Bacteria    Firmicutes  Clostridia Clostridiales
## 4   Cluster_455  Bacteria    Firmicutes  Clostridia Clostridiales
## 5     Cluster_8  Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 6    Cluster_62  Bacteria    Firmicutes  Clostridia Clostridiales
## 7   Cluster_290  Bacteria    Firmicutes  Clostridia Clostridiales
## 8   Cluster_744  Bacteria    Firmicutes  Clostridia Clostridiales
## 9    Cluster_25  Bacteria    Firmicutes  Clostridia Clostridiales
## 10  Cluster_383  Bacteria    Firmicutes  Clostridia Clostridiales
## 11   Cluster_12  Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 12  Cluster_236  Bacteria    Firmicutes  Clostridia Clostridiales
## 13  Cluster_159  Bacteria    Firmicutes  Clostridia Clostridiales
## 14  Cluster_162  Bacteria    Firmicutes  Clostridia Clostridiales
## 15  Cluster_160  Bacteria    Firmicutes  Clostridia Clostridiales
## 16   Cluster_70  Bacteria    Firmicutes  Clostridia Clostridiales
## 17  Cluster_978  Bacteria    Firmicutes  Clostridia Clostridiales
## 18 Cluster_1110  Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 19  Cluster_902  Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 20   Cluster_42  Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 21  Cluster_205  Bacteria    Firmicutes  Clostridia Clostridiales
## 22    Cluster_5  Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 23   Cluster_95  Bacteria    Firmicutes  Clostridia Clostridiales
## 24   Cluster_28  Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 25   Cluster_78  Bacteria    Firmicutes  Clostridia Clostridiales
## 26   Cluster_46  Bacteria    Firmicutes  Clostridia Clostridiales
##                         Family.x                       Genus.x Species.x
## 1  Clostridiales vadinBB60 group                       Unknown   Unknown
## 2                Lachnospiraceae                       Unknown   Unknown
## 3                Lachnospiraceae                 GCA-900066575   Unknown
## 4                Ruminococcaceae                       Unknown   Unknown
## 5                 Muribaculaceae                       Unknown   Unknown
## 6  Clostridiales vadinBB60 group                       Unknown   Unknown
## 7                Lachnospiraceae                       Unknown   Unknown
## 8                Lachnospiraceae                       Unknown   Unknown
## 9                Lachnospiraceae Lachnospiraceae NK4A136 group   Unknown
## 10               Lachnospiraceae                       Unknown   Unknown
## 11                Muribaculaceae                       Unknown   Unknown
## 12               Lachnospiraceae                       Unknown   Unknown
## 13               Lachnospiraceae                       Unknown   Unknown
## 14               Lachnospiraceae                       Unknown   Unknown
## 15               Lachnospiraceae                       Unknown   Unknown
## 16               Lachnospiraceae                Marvinbryantia   Unknown
## 17               Lachnospiraceae                       Unknown   Unknown
## 18                Muribaculaceae                       Unknown   Unknown
## 19                Muribaculaceae                       Unknown   Unknown
## 20                Muribaculaceae                       Unknown   Unknown
## 21               Ruminococcaceae                Butyricicoccus   Unknown
## 22                Prevotellaceae                Alloprevotella   Unknown
## 23               Lachnospiraceae Lachnospiraceae NK4A136 group   Unknown
## 24                Muribaculaceae                       Unknown   Unknown
## 25               Lachnospiraceae Lachnospiraceae NK4A136 group   Unknown
## 26               Ruminococcaceae       Ruminococcaceae UCG-014   Unknown
##    Kingdom.y      Phylum.y     Class.y       Order.y
## 1   Bacteria    Firmicutes  Clostridia Clostridiales
## 2   Bacteria    Firmicutes  Clostridia Clostridiales
## 3   Bacteria    Firmicutes  Clostridia Clostridiales
## 4   Bacteria    Firmicutes  Clostridia Clostridiales
## 5   Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 6   Bacteria    Firmicutes  Clostridia Clostridiales
## 7   Bacteria    Firmicutes  Clostridia Clostridiales
## 8   Bacteria    Firmicutes  Clostridia Clostridiales
## 9   Bacteria    Firmicutes  Clostridia Clostridiales
## 10  Bacteria    Firmicutes  Clostridia Clostridiales
## 11  Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 12  Bacteria    Firmicutes  Clostridia Clostridiales
## 13  Bacteria    Firmicutes  Clostridia Clostridiales
## 14  Bacteria    Firmicutes  Clostridia Clostridiales
## 15  Bacteria    Firmicutes  Clostridia Clostridiales
## 16  Bacteria    Firmicutes  Clostridia Clostridiales
## 17  Bacteria    Firmicutes  Clostridia Clostridiales
## 18  Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 19  Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 20  Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 21  Bacteria    Firmicutes  Clostridia Clostridiales
## 22  Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 23  Bacteria    Firmicutes  Clostridia Clostridiales
## 24  Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 25  Bacteria    Firmicutes  Clostridia Clostridiales
## 26  Bacteria    Firmicutes  Clostridia Clostridiales
##                         Family.y                       Genus.y Species.y
## 1  Clostridiales vadinBB60 group                       Unknown   Unknown
## 2                Lachnospiraceae                       Unknown   Unknown
## 3                Lachnospiraceae                 GCA-900066575   Unknown
## 4                Ruminococcaceae                       Unknown   Unknown
## 5                 Muribaculaceae                       Unknown   Unknown
## 6  Clostridiales vadinBB60 group                       Unknown   Unknown
## 7                Lachnospiraceae                       Unknown   Unknown
## 8                Lachnospiraceae                       Unknown   Unknown
## 9                Lachnospiraceae Lachnospiraceae NK4A136 group   Unknown
## 10               Lachnospiraceae                       Unknown   Unknown
## 11                Muribaculaceae                       Unknown   Unknown
## 12               Lachnospiraceae                       Unknown   Unknown
## 13               Lachnospiraceae                       Unknown   Unknown
## 14               Lachnospiraceae                       Unknown   Unknown
## 15               Lachnospiraceae                       Unknown   Unknown
## 16               Lachnospiraceae                Marvinbryantia   Unknown
## 17               Lachnospiraceae                       Unknown   Unknown
## 18                Muribaculaceae                       Unknown   Unknown
## 19                Muribaculaceae                       Unknown   Unknown
## 20                Muribaculaceae                       Unknown   Unknown
## 21               Ruminococcaceae                Butyricicoccus   Unknown
## 22                Prevotellaceae                Alloprevotella   Unknown
## 23               Lachnospiraceae Lachnospiraceae NK4A136 group   Unknown
## 24                Muribaculaceae                       Unknown   Unknown
## 25               Lachnospiraceae Lachnospiraceae NK4A136 group   Unknown
## 26               Ruminococcaceae       Ruminococcaceae UCG-014   Unknown
#write.xlsx(res,"Abondances différentielles globales TF.xlsx",row.names=TRUE,col.names = TRUE, sep="\t",dec=",",na=" ")
#write.table(res, "Abondances différentielles globales TF.csv", row.names=FALSE, sep="\t",dec=",", na=" ")

TABLEAU CI DESSUS AU FORMAT EXCEL DANS LE FICHIER “TABLEAU DE VALEURS” JOINT AVEC CE DOCUMENT

7.1 Heatmap des taxas prévalents avec pval=0.05

Représentation de la table d’abondance. Montre les intéractions entre (les groupes) de taxa et (les groupes) d’échantillons. Elle essaie de trouver un ordre des echantillons et des OTU pour faire sortir une info interessante.

## On rajoute une fausse colonne nomm?e "my_rank" dans la table taxonomique dans laquelle on a concaténé nom de famille et nom de cluster
my.da.otus <- frogs.data.rare
tdf <- tax_table(frogs.data.rare)
tdf <- cbind(tdf, my_rank = paste0(tdf[, "Family"], " (", taxa_names(frogs.data.rare), ")"))
tax_table(my.da.otus) <- tdf

#Ensuite tu peux appeler plot_heatmap sur my.data.otus comme avant en remplaçant taxa.label = "Family" par taxa.label = #"my_rank"
#Script ci dessous pour avoir ordre des échantillons par ordre croissant:

#p<-plot_heatmap(prune_taxa(da.otus$OTU,my.da.otus) , taxa.order = da.otus$OTU,sample.order=sample_names(my.da.otus),low #= "yellow", high = "red", na.value = "white", title = "DAOTUs",taxa.label = "my_rank")+facet_grid(". ~ Groupe", scales #= "free_x") 
#p<-p+ theme(axis.text.x = element_text(color="black", size=10, angle=90),axis.text.y = element_text(color="black", #size=10))
#plot(p)

#Script avec ordre pour info interessante et non pas ordre grandeur sample             na.value= valeur manquante
p<-plot_heatmap(prune_taxa(da.otus$OTU,my.da.otus) ,low = "yellow", high = "red", na.value = "white", title = "DAOTUs TF",taxa.label = "my_rank")+facet_grid(". ~ Group", scales = "free_x") 
p<-p+ theme(axis.text.x = element_text(color="black", size=10, angle=90),axis.text.y = element_text(color="black", size=10))
plot(p)

#ggplotly(p)
#Figure 8: Heatmap avec dendrogram et clustering sample
#library( "genefilter" )
#topVarGenes <- head( order( rowVars( da.otus ), decreasing=TRUE ), 35 ) #35 sample avec variance la plus haute
#heatmap.2( assay(rld)[ topVarGenes, ], scale="row",
#     trace="none", dendrogram="column",
#     col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255))

7.2 Heatmap par Family ou Genus

# p <- plot_heatmap(prune_taxa(names(sort(taxa_sums(data.rare), decreasing = TRUE)[1:49]), data.rare), distance = "bray", method = "MDS", low = "yellow", high = "red", na.value = "white", title = "Taxa heatmap by samples",taxa.label = "Genus")
# p <- p + facet_grid(". ~ Group", scales = "free_x")
# 
# plot(p)
# p <- plot_heatmap(prune_taxa(names(sort(taxa_sums(data.rare), decreasing = TRUE)[1:49]), data.rare), distance = "bray", method = "MDS", low = "yellow", high = "red", na.value = "white", title = "Taxa heatmap by samples",taxa.label = "Family")
# p <- p + facet_grid(". ~ Type", scales = "free_x")
# 
# plot(p)

7.3 Boxplot des taxas prevalents avec pval=0.05 et min.abundance = 30

La fonction build_plotdata_daotus ne conserve que les OTUs qui ont un nombre total de reads supérieur à 30. C’est pour se débarasser des OTUs dont l’abondance est 0 dans le groupe A et éventuellement 2 ou 3 dans le groupe B. Ils sont statistiquement significatifs mais potentiellement pas très intéressants.

plotdata <- build_plotdata_daotus(da.otus, filtered.data, min.abundance = 50)

p <- ggplot(plotdata, aes(x = Group, y = Abundance)) + geom_boxplot(aes(group = Group, fill = Group)) + theme_bw() + theme(legend.position = "none") + facet_wrap(~OTU2, ncol = 4, scales = "free_y")+ theme(axis.text.x = element_text(angle=90,vjust = 1,hjust = 1,size=15)) + stat_compare_means(method = "anova", hide.ns = TRUE,vjust = 0.5) # https://www.datanovia.com/en/fr/lessons/anova-dans-r/  Site sur l'anova et test post hoc

family.palette<- c( 
                   "A"           = "#09C51F", 
                   "B"          = "#8BE9F9", 
                   "C"           = "#0995C5", 
                   "D"          = "#094DC5")
p<-p+scale_color_manual(values = family.palette)+ scale_fill_manual(breaks = c( "A","B", "C", "D"), 
                       values=c("#09C51F","#8BE9F9", "#0995C5", "#094DC5"))
#ggplotly(p)
plot(p)

8 Abondances différentielles de l’effet du régime entre 2 groupes

OTUs différentiellement abondants entre 2 régimes.

#Fonctions auxiliaires g?n?riques qui sont utilis?es pour toutes les analyses (?vite la r?p?titon de blocs de code)

## Extraction des OTUs diff?rentiels et ajout des informations taxonomiques
extract_daotus <- function(cond1, cond2, physeq, pval = 0.05) { #CHANGE IF YOU WANT
  res <- DESeq2::results(dds, contrast=c("Group", cond1, cond2)) %>% as.data.frame() %>% 
    mutate(OTU = taxa_names(physeq)) %>% filter(padj < pval) %>% 
    inner_join(tax_table(physeq) %>% as("matrix") %>% as_tibble(rownames = "OTU"), by = "OTU") %>%
    arrange(log2FoldChange)
  return(res)
}
## Construction du tableau de donn?es pour repr?sentation graphique
build_plotdata_daotus <- function(da.otus, physeq, min.abundance = 50) {   #CHANGE IF YOU WANT
  da.data <- rarefy_even_depth(physeq, rng = 20170329, verbose = FALSE) %>% 
    prune_taxa(da.otus$OTU, .)
  is_abundant <- function(x) { x > min.abundance }
  abundant_taxa <- genefilter_sample(da.data, is_abundant, A = 1)
  da.data <- prune_taxa(abundant_taxa, da.data)
  max.level <- apply(as(tax_table(da.data), "matrix"), 1, function(x) { 
                         y <- x[!is.na(x)]; 
                         y <- y[y != ""];  
                         y <- y[!grepl("unknown", y, ignore.case = T)]
                         y <- y[y != "Multi-affiliation"]
                         # if (names(y)[length(y)] != "Family") {
                         #     return(paste(y["Family"], y[length(y)], sep = "/"))
                         #} else {
                             return(y[length(y)])
                         # } 
                     })
otu.formatted.names <- data.frame(OTU2 = paste0(gsub(pattern = "Cluster_", "", names(max.level)), "_", max.level), 
                          OTU = names(max.level), 
                          stringsAsFactors = FALSE) %>% 
  inner_join(da.otus, by = "OTU") %>% arrange(log2FoldChange)
otu.order <- otu.formatted.names %>% pull(OTU2)
da.df <- merge(psmelt(da.data), otu.formatted.names) %>% mutate(OTU2 = factor(OTU2, levels = otu.order))
return(da.df)
}
## Fonction de heatmap
plot_da_heatmap <- function(da.otus, physeq) {
  ## Build plotdata and order OTU according to lfc
  plotdata <- build_plotdata_daotus(da.otus, physeq, 0) %>% 
    mutate(OTU = factor(OTU, levels = da.otus$OTU))
  
  ## Build vector label
  labvec <- with(da.otus, paste0(Family, " (", OTU, ")"))
  names(labvec) <- da.otus$OTU
  
  ## Automatic text size
  text_size <- function(n, mins = 8, maxs = 30, B = 12, D = 100) {
    s <- B * exp(-n/D)
    s <- ifelse(s > mins, s, mins)
    s <- ifelse(s < maxs, s, maxs)
    return(s)
  }
  
  ggplot(plotdata) + aes(x = Sample, y = OTU, fill = Abundance) + 
    geom_tile() + 
    facet_grid(~ Groupe, scales = "free_x") + 
    labs(y = NULL, x = "Sample") + 
    scale_y_discrete(labels = labvec) +
    scale_fill_gradient(low = "yellow", high = "red", na.value = "white", trans = scales::log_trans(4)) +
    theme(axis.text.x = element_text(angle = 270, size = text_size(nsamples(physeq))), 
          axis.text.y = element_text(size = text_size(nrow(da.otus))))
}
plot_da_boxplot <- function(plotdata) {
   ggplot(plotdata, aes(x = Group, y = Abundance)) + 
    geom_boxplot(aes(group = Group, fill = Group)) + 
    theme_bw() + 
    theme(legend.position = "none") + 
    facet_wrap(~OTU2, ncol = 3, scales = "free_y") + 
    geom_text(aes(y = Inf, x=Inf, label = paste0("p = ", format(pvalue, digits = 4))), size=4, 
              hjust = 1, vjust = 1) 
}

## Function pour le graphique composite
plot_da_composite <- function(da.otus, physeq, labvec = NULL, lfc.threshold = 3, min.abundance = 50) { #CHANGE IF YOU WANT
  ## Custome theme                                               #on peut mettre 3
  custom.theme <- theme_bw() + 
  theme(panel.spacing = unit(0.2, "lines"), 
        axis.title.y = element_blank(), 
        axis.ticks.y = element_blank(), 
        axis.text.y = element_blank(), 
        strip.text.y = element_blank())
  
  ## Plotdata
  plotdata <- build_plotdata_daotus(da.otus, physeq, min.abundance = min.abundance) %>%  
    filter(abs(log2FoldChange) >= lfc.threshold) %>% 
    arrange(log2FoldChange)
  
  ## Correct OTU and Family Order
  otu.order <- da.otus %>% pull(OTU)
  fam.order <- da.otus %>% select(Phylum, Family) %>% unique() %>% arrange(Phylum, Family) %>% pull(Family)
  plotdata <- plotdata %>% 
    mutate(OTU       = factor(OTU, levels = otu.order), 
           Family    = factor(Family, levels = fam.order), 
           OTU.label = if_else(Genus != "Unknown", Genus, NA_character_))

  ## Annotation data
  annotate.data <- plotdata %>% group_by(Family) %>% 
    summarize(OTU = length(unique(OTU))) %>% 
    mutate(label = factor(Family, levels = fam.order), 
           log2FoldChange = min(plotdata$log2FoldChange) * 1.1
           ## log2FoldChange = -Inf
    )
  ## Plots
  p.lfc <- ggplot(plotdata, aes(y = OTU, x = log2FoldChange, color = Family)) + 
    geom_point() + 
    geom_vline(xintercept = 0, linetype = 2) + 
    facet_grid(Family~., space = "free_y", scale = "free_y", switch = "both") + 
    geom_text(data = annotate.data, aes(label = label), hjust = 0, vjust = 1) + #vjust=1 c'est le point au dessus du nom family
    geom_text(aes(label = OTU.label, hjust="inward", vjust=1),size=3.5) #size=3 taille nom family de couleur 
  #vjust=0.5 Noms family à coté point
  
  p.evidence <- ggplot(plotdata, aes(x = OTU, y = -log10(padj), fill = Family)) + geom_bar(stat = "identity") +
    facet_grid(Family~., space = "free_y", scale = "free_y") + 
    coord_flip() + ylab("Evidence")
  
  if (is.null(labvec)) {
    labvec <- setNames(nm = unique(plotdata$Group))
  }
  hm.data <- plotdata %>% group_by_at(vars(-Sample, -Name, -Abundance)) %>% 
    summarize(Abundance = mean(Abundance)) %>% 
    ungroup() %>% 
    mutate(OTU    = factor(OTU, levels = otu.order), 
           Family = factor(Family, levels =fam.order),
           Group = factor(Group, levels = names(labvec), labels = labvec)) 
  
  p.hm <- ggplot(hm.data, aes(y = OTU, x = Group, fill = Abundance)) + geom_tile() + 
    facet_grid(Family~., space = "free_y", scale = "free_y") + #Heatmap séparée par Family
    scale_fill_gradient(trans = scales::log_trans(base = 2), na.value = "white", 
                        low = "white", high = "black") 
  
  p.lfc.2 <- p.lfc 
  p.lfc.2$layers[[3]] <- NULL
  p1 <- arrangeGrob(p.lfc.2 + custom.theme +
                      theme(legend.position = "none", 
                            strip.text.y = element_text( angle = 180,hjust = 1, size = 10), #Noms Family sur la gauche   
                            strip.background = element_blank()), 
                    p.evidence + custom.theme +
                      scale_x_discrete(position = "top") + 
                      theme(legend.position = "none", 
                            axis.text.y = element_text(size = 8)), 
                    p.hm + custom.theme,
                    widths = c(0.47, 0.38, 0.15), #taille largeur des 3 cadres doit être =1
                    nrow = 1, 
                    ncol = 3)
  grid::grid.newpage()
  grid::grid.draw(p1)
  invisible(p1)
}

##############################################################
######## graphique composite 2 ###############################

format_names <- function(physeq, min.rank = 0) {
  tax <- as(tax_table(physeq), "matrix")
  tax[tax %in% c("Unknown", "Multi-Affiliation")] <- NA
  final_rank <- function(x) {
    x <- x[!is.na(x)]
    if (length(x) <= min.rank) {
      return("") 
    } else {
      tail(x, 1)
    }
  }
  apply(tax, 1, final_rank)
}

## Function pour le graphique composite
plot_da_compositebis <- function(da.otus, physeq, labvec = NULL, lfc.threshold = 3, min.abundance = 50) { #CHANGE IF YOU WANT
  
  ## Custome theme                                               #on peut mettre 3 ou 2
  custom.theme <- theme_bw() + 
  theme(panel.spacing = unit(0.2, "lines"), 
        axis.title.y = element_blank(), 
        axis.ticks.y = element_blank(), 
        axis.text.y = element_blank(), 
        strip.text.y = element_blank(),
        axis.text.x = element_text(size = 10)) #taille noms groupes heatmap
  
  ## Plotdata
  plotdata <- build_plotdata_daotus(da.otus, physeq, min.abundance = min.abundance) %>%  
    inner_join(tax_table(data) %>% as("matrix") %>% as.data.frame() %>% 
               mutate(OTU   = taxa_names(data),
                      OTU.label = format_names(data)), 
             by = "OTU") %>%
  filter(abs(log2FoldChange) >= 3) %>% #On peut changer la valeur du log2FoldChange =3 ou 2 #CHANGE IF YOU WANT
    arrange(log2FoldChange)
  
 
##### Correct OTU and Family Order Ajout Mahendra
otu.order <- da.otus %>% pull(OTU)
fam.order <- da.otus %>% select(Phylum, Family) %>% unique() %>% arrange(Phylum, Family) %>% pull(Family)
plotdata <- plotdata %>%
   ## Ce que tu veux rajouter
   inner_join(tax_table(data) %>% as("matrix") %>% as.data.frame() %>%
                   mutate(OTU         = taxa_names(data),
                   OTU.label   = format_names(data),
                   OTU.label.2 = format_names(data, min.rank = 5)),
                   by = "OTU") %>%
  ## Tri des OTUs par log fold change décroissant et des familles par ordre alphabétique au sein des phylums
          mutate(OTU       = factor(OTU, levels = otu.order), 
           Family    = factor(Family, levels = fam.order))

 ## Plots
#br <- seq(-20, 10, by = 2)
  #p.lfc <- ggplot(plotdata, aes(y = OTU, x = 2^log2FoldChange, color = Family)) +
  #geom_point(aes(size = 1)) + geom_vline(xintercept = 1, linetype = 2) + #size=1 taille point et noms family
  #geom_text(aes(x = 2^(log2FoldChange - .25*sign(log2FoldChange)),
  #              label = OTU.label.2, size = 1, #legende des tests PCR
  #              hjust = if_else(log2FoldChange > 0, 1, 0))) +
  #  scale_color_brewer(palette = "Paired") +  #scale_color_brewer(palette="Paired") si plus de 10 Family
 # scale_x_continuous(trans = "log2", breaks =  2^br,
  #                   labels = c(paste0("1/", 2^-br[br <0]), 2^br[br>= 0]),
  #                   name = "Fold change (log scale)") +
  #scale_y_discrete(position = "right")

#library(paletteer)


p.lfc <- ggplot(plotdata, aes(y = OTU, x = log2FoldChange, color = Family)) +
  geom_point(size=5) + geom_vline(xintercept = 0, linetype = 2) + #size=1 taille point et noms family
  geom_text(aes(x = log2FoldChange - .25*sign(log2FoldChange),
                label = OTU.label.2, vjust=0.5, #Taille et position noms family à côté des points
               hjust = if_else(log2FoldChange > 0, 1, 0)))+
#  scale_color_paletteer_d("bpalette")+
  scale_color_brewer(palette = "Paired") +  #scale_color_brewer(palette="Paired") si plus de 10 Family
  scale_y_discrete(position = "right")
  # + scale_x_continuous(limits = c(-15, 35))   ######## pour changer echelle axe des x (log2foldchange)

#Heatmap
hm.data <- plotdata %>% group_by_at(vars(-Sample, -Name, -Abundance)) %>% 
    summarize(Abundance = mean(Abundance)) %>% 
    ungroup() %>% 
    mutate(OTU    = factor(OTU, levels = otu.order), 
           Family = factor(Family, levels =fam.order),
           Group = factor(Group, levels = names(labvec), labels = labvec))

p.hm <- ggplot(hm.data, aes(y = OTU, x = Group, fill = Abundance)) + geom_tile() + 
  scale_fill_gradient(trans = log_trans(base = 2), na.value = "white", 
                      low = "white", high = "black") 
  
#
p2 <- arrangeGrob(p.lfc + custom.theme + guides(colour = guide_legend(override.aes = list(size=5)))+ #taille points de la legende
                      theme(legend.position = c(0.025, 0.975), ########### OU theme(legend.position = c(0.575, 0.425) ##################
                          legend.justification = c(0, 1), 
                          legend.text = element_text(size = 16), #taille noms legende
                          legend.background = element_rect(fill = "transparent"),
                          #axis.text.y = element_text(size = 9, hjust = 0.5),  #Affichage noms des clusters
                          axis.text.x = element_text(size = 10)), #taille noms legende axe des x
                  
                    p.hm + custom.theme, #p.hm défini la heatmap
                  widths = c(0.80, 0.20), #largeur heatmap
                  nrow = 1, 
                  ncol = 2)
  
  grid::grid.newpage()
  grid::grid.draw(p2)
  invisible(p2)
}


##############  Function pour le graphique composite TER
plot_da_compositeter <- function(da.otus, physeq, labvec = NULL, lfc.threshold = 3, min.abundance = 50) { #CHANGE IF YOU WANT
  
  ## Custome theme                                               #on peut mettre 3 ou 2
  custom.theme <- theme_bw() + 
  theme(panel.spacing = unit(0.2, "lines"), 
        axis.title.y = element_blank(), 
        axis.ticks.y = element_blank(), 
        axis.text.y = element_blank(), 
        strip.text.y = element_blank(),
        axis.text.x = element_text(size = 10)) #taille noms groupes heatmap
  
  ## Plotdata
  plotdata <- build_plotdata_daotus(da.otus, physeq, min.abundance = min.abundance) %>%  
    inner_join(tax_table(data) %>% as("matrix") %>% as.data.frame() %>% 
               mutate(OTU   = taxa_names(data),
                      OTU.label = format_names(data)), 
             by = "OTU") %>%
  filter(abs(log2FoldChange) >= 3) %>% #On peut changer la valeur du log2FoldChange =3 ou 2 #CHANGE IF YOU WANT
    arrange(log2FoldChange)
  
 
##### Correct OTU and Family Order Ajout Mahendra
otu.order <- da.otus %>% pull(OTU)
fam.order <- da.otus %>% select(Phylum, Family) %>% unique() %>% arrange(Phylum, Family) %>% pull(Family)
plotdata <- plotdata %>%
   ## Ce que tu veux rajouter
   inner_join(tax_table(data) %>% as("matrix") %>% as.data.frame() %>%
                   mutate(OTU         = taxa_names(data),
                   OTU.label   = format_names(data),
                   OTU.label.2 = format_names(data, min.rank = 5)),
                   by = "OTU") %>%
  ## Tri des OTUs par log fold change décroissant et des familles par ordre alphabétique au sein des phylums
          mutate(OTU       = factor(OTU, levels = otu.order), 
           Family    = factor(Family, levels = fam.order))

 ## Plots
#br <- seq(-20, 10, by = 2)
  #p.lfc <- ggplot(plotdata, aes(y = OTU, x = 2^log2FoldChange, color = Family)) +
  #geom_point(aes(size = 1)) + geom_vline(xintercept = 1, linetype = 2) + #size=1 taille point et noms family
  #geom_text(aes(x = 2^(log2FoldChange - .25*sign(log2FoldChange)),
  #              label = OTU.label.2, size = 1, #legende des tests PCR
  #              hjust = if_else(log2FoldChange > 0, 1, 0))) +
  #  scale_color_brewer(palette = "Paired") +  #scale_color_brewer(palette="Paired") si plus de 10 Family
 # scale_x_continuous(trans = "log2", breaks =  2^br,
  #                   labels = c(paste0("1/", 2^-br[br <0]), 2^br[br>= 0]),
  #                   name = "Fold change (log scale)") +
  #scale_y_discrete(position = "right")

#library(paletteer)

p.lfc <- ggplot(plotdata, aes(y = OTU, x = log2FoldChange, color = Family)) +
  geom_point(size=5) + geom_vline(xintercept = 0, linetype = 2) + #size=1 taille point et noms family
  geom_text(aes(x = log2FoldChange - .25*sign(log2FoldChange),
                label = OTU.label.2, vjust=0.5, #Taille et position noms family à côté des points
               hjust = if_else(log2FoldChange > 0, 1, 0)))+
#  scale_color_paletteer_d("bpalette")+
  scale_color_brewer(palette = "Paired") +  #scale_color_brewer(palette="Paired") si plus de 10 Family
  scale_y_discrete(position = "right") + scale_x_continuous(limits = c(-15, 30))   ######## pour changer echelle axe des x (log2foldchange)
  

#Heatmap
hm.data <- plotdata %>% group_by_at(vars(-Sample, -Name, -Abundance)) %>% 
    summarize(Abundance = mean(Abundance)) %>% 
    ungroup() %>% 
    mutate(OTU    = factor(OTU, levels = otu.order), 
           Family = factor(Family, levels =fam.order),
           Group = factor(Group, levels = names(labvec), labels = labvec))

p.hm <- ggplot(hm.data, aes(y = OTU, x = Group, fill = Abundance)) + geom_tile() + 
  scale_fill_gradient(trans = log_trans(base = 2), na.value = "white", 
                      low = "white", high = "black") 
  
#
p2 <- arrangeGrob(p.lfc + custom.theme + guides(colour = guide_legend(override.aes = list(size=5)))+ #taille points de la legende
                      theme(legend.position = c(0.575, 0.525), ########### OU theme(legend.position = c(0.575, 0.425) ##################
                          legend.justification = c(0, 1), 
                          legend.text = element_text(size = 16), #taille noms legende
                          legend.background = element_rect(fill = "transparent"),
                          #axis.text.y = element_text(size = 9, hjust = 0.5),  #Affichage noms des clusters
                          axis.text.x = element_text(size = 10)), #taille noms legende axe des x
                  
                    p.hm + custom.theme, #p.hm défini la heatmap
                  widths = c(0.80, 0.20), #largeur heatmap
                  nrow = 1, 
                  ncol = 2)
  
  grid::grid.newpage()
  grid::grid.draw(p2)
  invisible(p2)
}

################################################################

## tester la présence d'un OTU
is_present <- function(comptage) {comptage > 0} 

8.1 1- Comparaisons entre les groupes “A” et “B” à TF

On répète l’analyse d’abondance différentielle précédente en ne prenant que les échantillons “Chow” et “HFPalm” en utilisant la même p-valeur ajustée, pval=0.05

Nous avons 12 échantillons.

On commence par sélectionner les échantillons

data <- frogs.data %>% subset_samples(Group %in% c("A","B"))

Puis on ne conserve que les taxas prévalents

prevalent_taxa <- genefilter_sample(data, is_present, A = 2) #25% des echantillons
filtered.data <- prune_taxa(prevalent_taxa, data)

On fait ensuite l’analyse d’abondance différentielle avec DESeq2

cds <- phyloseq_to_deseq2(filtered.data, design = ~ Group)
dds <- DESeq2::DESeq(cds)

8.1.0.1 Table des OTUs différentiels entre ces 2 groupes avec pval=0.05

da.otus <- extract_daotus("A","B", filtered.data, pval = 0.05) 
da.otus 
##      baseMean log2FoldChange     lfcSE      stat       pvalue         padj
## 1   46.271680      -9.075573 1.4450227 -6.280575 3.373236e-10 2.951582e-08
## 2   18.863758      -7.296419 1.5552303 -4.691536 2.711614e-06 1.186331e-04
## 3   13.224601      -6.781594 1.6302812 -4.159769 3.185691e-05 9.291598e-04
## 4    7.717996      -6.492563 1.9217697 -3.378429 7.290119e-04 9.112648e-03
## 5    8.111684      -6.070274 1.8941783 -3.204701 1.352030e-03 1.575652e-02
## 6    3.929734      -5.520146 1.2298744 -4.488382 7.176625e-06 2.511819e-04
## 7   10.397043      -4.937106 1.3096854 -3.769689 1.634512e-04 3.575495e-03
## 8   39.622790      -4.821684 1.3860675 -3.478679 5.038924e-04 7.348430e-03
## 9    3.684448      -4.417765 1.5201683 -2.906103 3.659614e-03 3.049678e-02
## 10   2.390621      -4.300409 1.1653326 -3.690285 2.240033e-04 4.355621e-03
## 11  74.521857      -4.289727 0.8135819 -5.272643 1.344729e-07 7.844251e-06
## 12   3.876456      -3.902242 1.2246589 -3.186391 1.440596e-03 1.575652e-02
## 13 168.417425      -3.841147 0.5456952 -7.038997 1.936282e-12 3.388494e-10
## 14   2.332818      -3.680933 1.1886545 -3.096723 1.956727e-03 1.902374e-02
## 15  19.626578      -3.525414 0.9298623 -3.791329 1.498432e-04 3.575495e-03
## 16   7.923295      -2.983064 0.8671563 -3.440054 5.815992e-04 7.829220e-03
## 17 279.087246      -1.659972 0.4682697 -3.544905 3.927539e-04 6.873194e-03
## 18  17.017952       1.748375 0.6144782  2.845300 4.436959e-03 3.529399e-02
## 19  75.707869       1.921791 0.6142047  3.128909 1.754568e-03 1.806173e-02
## 20 166.331913       2.336232 0.6645127  3.515707 4.385846e-04 6.977483e-03
## 21  90.947920       3.075570 1.0302121  2.985376 2.832304e-03 2.608701e-02
## 22  10.935456       3.763516 1.2855825  2.927480 3.417215e-03 2.990063e-02
##            OTU  Kingdom        Phylum       Class         Order
## 1  Cluster_103 Bacteria    Firmicutes  Clostridia Clostridiales
## 2   Cluster_64 Bacteria    Firmicutes  Clostridia Clostridiales
## 3  Cluster_122 Bacteria    Firmicutes  Clostridia Clostridiales
## 4  Cluster_288 Bacteria    Firmicutes  Clostridia Clostridiales
## 5   Cluster_91 Bacteria    Firmicutes  Clostridia Clostridiales
## 6  Cluster_455 Bacteria    Firmicutes  Clostridia Clostridiales
## 7    Cluster_8 Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 8   Cluster_62 Bacteria    Firmicutes  Clostridia Clostridiales
## 9  Cluster_290 Bacteria    Firmicutes  Clostridia Clostridiales
## 10 Cluster_744 Bacteria    Firmicutes  Clostridia Clostridiales
## 11  Cluster_25 Bacteria    Firmicutes  Clostridia Clostridiales
## 12 Cluster_383 Bacteria    Firmicutes  Clostridia Clostridiales
## 13  Cluster_12 Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 14 Cluster_236 Bacteria    Firmicutes  Clostridia Clostridiales
## 15 Cluster_159 Bacteria    Firmicutes  Clostridia Clostridiales
## 16 Cluster_162 Bacteria    Firmicutes  Clostridia Clostridiales
## 17   Cluster_5 Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 18 Cluster_160 Bacteria    Firmicutes  Clostridia Clostridiales
## 19  Cluster_40 Bacteria    Firmicutes  Clostridia Clostridiales
## 20  Cluster_39 Bacteria    Firmicutes  Clostridia Clostridiales
## 21  Cluster_76 Bacteria    Firmicutes  Clostridia Clostridiales
## 22 Cluster_351 Bacteria    Firmicutes  Clostridia Clostridiales
##                           Family                         Genus Species
## 1  Clostridiales vadinBB60 group                       Unknown Unknown
## 2                Lachnospiraceae                       Unknown Unknown
## 3                Lachnospiraceae                 GCA-900066575 Unknown
## 4                Lachnospiraceae Lachnospiraceae NK4A136 group Unknown
## 5                Ruminococcaceae       Ruminococcaceae UCG-014 Unknown
## 6                Ruminococcaceae                       Unknown Unknown
## 7                 Muribaculaceae                       Unknown Unknown
## 8  Clostridiales vadinBB60 group                       Unknown Unknown
## 9                Lachnospiraceae                       Unknown Unknown
## 10               Lachnospiraceae                       Unknown Unknown
## 11               Lachnospiraceae Lachnospiraceae NK4A136 group Unknown
## 12               Lachnospiraceae                       Unknown Unknown
## 13                Muribaculaceae                       Unknown Unknown
## 14               Lachnospiraceae                       Unknown Unknown
## 15               Lachnospiraceae                       Unknown Unknown
## 16               Lachnospiraceae                       Unknown Unknown
## 17                Prevotellaceae                Alloprevotella Unknown
## 18               Lachnospiraceae                       Unknown Unknown
## 19               Lachnospiraceae                       Blautia Unknown
## 20               Lachnospiraceae                       Unknown Unknown
## 21               Lachnospiraceae                       Unknown Unknown
## 22               Lachnospiraceae       Lachnospiraceae UCG-006 Unknown
#write.xlsx(da.otus,"TF Chow HFPalm.xlsx",row.names=TRUE,col.names = TRUE, sep="\t",dec=",",na=" ")
#write.table(da.otus, "Control et Ind_Inf_Int.csv", row.names=FALSE, sep="\t",dec=",", na=" ")

TABLEAU CI DESSUS AU FORMAT EXCEL DANS LE FICHIER “TABLEAU DE VALEURS” JOINT AVEC CE DOCUMENT

Le nombre d’OTUs DA en fonction de la p-valeur est represente ci-dessous:

ggplot(da.otus %>% arrange(padj) %>% mutate(Nb = 1:n()), 
       aes(x = padj, y = Nb)) + 
  geom_point() + 
  scale_x_log10(breaks  = 10^(-seq(2, 41, 3))) +
  labs(x = "p valeur ajustée", y = "Nombre d'OTUs DA")

8.1.1 Heatmap avec pval=0.05

plot_da_heatmap(da.otus, data) + ggtitle("DAOTUs[A-B] ")

8.1.2 Boxplot des OTUs prévalents avec pval=0.05 et min.abundance = 50

plotdata <- build_plotdata_daotus(da.otus, data, min.abundance = 50)  
plot_da_boxplot(plotdata)

Représentation en boxplot des OTUs differentiellements abondants entre les groupe HFD et HFD_Antibiotiques en fonction du Fold Change

plotdata<-inner_join(da.otus,psmelt(data))%>%arrange(log2FoldChange)%>%filter(Group%in%c("A","B"))
ggplot(plotdata,aes(x=Group,y=Abundance,fill=Phylum,color=Phylum,alpha=Group))+scale_y_log10()+scale_alpha_discrete(range=c(0,1),guide=guide_legend(override.aes = list(fill="black")))+facet_wrap(~Phylum)+scale_y_log10()+geom_boxplot()  

# Ou ceci:
#plotdata<-inner_join(res,psmelt(frogs.data.rare))%>%arrange(log2FoldChange)%>%filter(Groupe%in%c("TEMOIN", "COLZA_SOJA"))
#ggplot(plotdata,aes(x=Groupe,y=Abundance))+facet_wrap(~Phylum)+scale_y_log10()+geom_boxplot(aes(group = Groupe, fill = Groupe))
plotdata<-inner_join(da.otus,psmelt(data))%>%arrange(log2FoldChange)%>%filter(Group%in%c("A","B"))
ggplot(plotdata,aes(x=Group,y=Abundance,fill=Phylum,color=Phylum,alpha=Group))+scale_y_log10()+scale_alpha_discrete(range=c(0,1),guide=guide_legend(override.aes = list(fill="black")))+facet_wrap(~Family)+scale_y_log10()+geom_boxplot()

p<-ggplot(plotdata,aes(x=OTU,y=Abundance,fill=Phylum,color=Phylum,alpha=Group))+scale_y_log10()+scale_alpha_discrete(range=c(0,1),guide=guide_legend(override.aes = list(fill="black")))+theme(axis.text.x = element_text(angle=90,vjust = 1,hjust = 1))+geom_boxplot()
#ggplotly(p)
plot(p)

# p<-ggplot(plotdata, aes(x=OTU2,y=Abundance,fill=Phylum,color=Phylum,alpha=Group)) +
#  scale_y_log10() +
#  scale_alpha_discrete(range=c(0,1), guide=guide_legend(override.aes = list(fill="black"))) +
#  theme(axis.text.x = element_text(angle=90,vjust = 1,hjust = 1,size=12)) +
#  geom_boxplot()+ geom_text(aes(y = 1000, label = paste0("p = ", format(pvalue)),angle=90))
# #ggplotly(p)
# plot(p)
# plotdata<-inner_join(da.otus,psmelt(data))%>%arrange(log2FoldChange)%>%filter(Group%in%c("Chow","HFPalm"))
# ggplot(plotdata,aes(x=Group,y=Abundance,fill=Phylum,color=Phylum,alpha=Group))+scale_y_log10()+scale_alpha_discrete(range=c(0,1),guide=guide_legend(override.aes = list(fill="black")))+facet_wrap(~Family)+scale_y_log10()+geom_boxplot()
#p<-ggplot(plotdata, aes(x=OTU2,y=Abundance,fill=Family,color=Family,alpha=Groupe)) + 
#  scale_y_log10() +
#  scale_alpha_discrete(range=c(0,1), guide=guide_legend(override.aes = list(fill="black"))) +
#  theme(axis.text.x = element_text(angle=90,vjust = 1,hjust = 1,size=12)) +
#  geom_boxplot()+ geom_text(aes(y = 400, label = paste0("p = ", format(pvalue)),angle=90))
#ggplotly(p)
#plot(p)

8.1.3 Représentations avec pval = 0.05 / min abundance = 50 / log2foldchange = 3

plot_da_composite(da.otus, data, labvec = c("Chow" = "Chow", "HFPalm" = "HFP"), min.abundance = 50, lfc.threshold = 3)  

Graphique sur lequel n’est affiché que la classification d’un OTU que si elle est plus précise que la famille (qui apparait déjà via la couleur)

plot_da_compositeter(da.otus, data, labvec = c("Chow" = "Chow", "HFPalm" = "HFP"), min.abundance = 50, lfc.threshold = 3) 

#ggsave(p, file = "MMDaOTUsT90 HR_2HFDvsNAFLR_2HFD pval=0.001 min abundance=50 log2foldchange=5.png",width = 15, height = 7,dpi=300)

8.1.4 Figure publi Sebastian avec echelle Fold Change (log scale)

format_names <- function(physeq, min.rank = 0) {
  tax <- as(tax_table(physeq), "matrix")
  tax[tax %in% c("Unknown", "Multi-Affiliation")] <- NA
  final_rank <- function(x) {
    x <- x[!is.na(x)]
    if (length(x) <= min.rank) {
      return("") 
    } else {
      tail(x, 1)
    }
  }
  apply(tax, 1, final_rank)
}

## Function pour le graphique composite
plot_da_compositebis <- function(da.otus, physeq, labvec = NULL, lfc.threshold = 3, min.abundance = 50) {
  
  ## Custome theme                                               #on peut mettre 3 ou 2
  custom.theme <- theme_bw() + 
  theme(panel.spacing = unit(0.2, "lines"), 
        axis.title.y = element_blank(), 
        axis.ticks.y = element_blank(), 
        axis.text.y = element_blank(), 
        strip.text.y = element_blank(),
        axis.text.x = element_text(size = 11)) #taille noms groupes heatmap
  
  ## Plotdata
  plotdata <- build_plotdata_daotus(da.otus, physeq, min.abundance = min.abundance) %>%  
    inner_join(tax_table(data) %>% as("matrix") %>% as.data.frame() %>% 
               mutate(OTU   = taxa_names(data),
                      OTU.label = format_names(data)), 
             by = "OTU") %>%
  filter(abs(log2FoldChange) >= 3) %>% #On peut changer la valeur du log2FoldChange =3 ou 2
    arrange(log2FoldChange)
  
 
##### Correct OTU and Family Order Ajout Mahendra
otu.order <- da.otus %>% pull(OTU)
fam.order <- da.otus %>% select(Phylum, Family) %>% unique() %>% arrange(Phylum, Family) %>% pull(Family)
plotdata <- plotdata %>%
   ## Ce que tu veux rajouter
   inner_join(tax_table(data) %>% as("matrix") %>% as.data.frame() %>%
                   mutate(OTU         = taxa_names(data),
                   OTU.label   = format_names(data),
                   OTU.label.2 = if_else(Genus != "Unknown", Genus, NA_character_)), #format_names(data, min.rank = 4) #Pour changer les noms à côté des points
                   by = "OTU") %>%
  ## Tri des OTUs par log fold change décroissant et des familles par ordre alphabétique au sein des phylums
          mutate(OTU       = factor(OTU, levels = otu.order), 
           Family    = factor(Family, levels = fam.order))

 ## Plots

#family.palette<- c("Atopobiaceae"                = "#DE1906", 
#                   "Bacteroidaceae"           = "#217AD4", 
#                   "Rikenellaceae"             = "#1BA728", 
 #                  "Tannerellaceae"                  = "#7C3E9D", 
  #                 "Lachnospiraceae"                 = "#E89018", 
   #                "Ruminococcaceae"             = "#905B03", 
    #               "Desulfovibrionaceae"                 = "#F897CF"
     #              )

br <- seq(-10, 10, by = 2)
  
  p.lfc <- ggplot(plotdata, aes(y = OTU, x = 2^log2FoldChange, color = Family)) +
  geom_point(size=5) + geom_vline(xintercept = 1, linetype = 2) + #size=1 taille point et noms family
  geom_text(aes(x = 2^(log2FoldChange - .25*sign(log2FoldChange)),
                label = OTU.label.2, vjust=0.5, #noms family à côté des points en pas juste en dessous
                hjust = if_else(log2FoldChange > 0, 1, 0))) +
     scale_color_brewer(palette="Paired") +  # scale_color_manual(values = family.palette)  si plus de 10 Family
  scale_x_continuous(trans = "log2", breaks =  2^br,
                     labels = c(paste0("1/", 2^-br[br <0]), 2^br[br>= 0]),
                     name = "Fold change (log scale)") +
  scale_y_discrete(position = "right")


  
#Heatmap
hm.data <- plotdata %>% group_by_at(vars(-Sample, -Name, -Abundance)) %>% 
    summarize(Abundance = mean(Abundance)) %>% 
    ungroup() %>% 
    mutate(OTU    = factor(OTU, levels = otu.order), 
           Family = factor(Family, levels =fam.order),
           Group = factor(Group, levels = names(labvec), labels = labvec))

p.hm <- ggplot(hm.data, aes(y = OTU, x = Group, fill = Abundance)) + geom_tile() + 
  scale_fill_gradient(trans = log_trans(base = 2), na.value = "white", 
                      low = "white", high = "black") 
  
#
p2 <- arrangeGrob(p.lfc + custom.theme + guides(colour = guide_legend(override.aes = list(size=5)))+ #taille points de la legende
                      theme(legend.position = c(0.025, 0.975), 
                          legend.justification = c(0, 1), 
                          legend.text = element_text(size = 16), #taille noms legende
                          legend.background = element_rect(fill = "transparent"),
                          axis.text.y = element_text(size = 9, hjust = 0.5),  #Affichage noms des clusters,
                          axis.text.x = element_text(size = 11)),
                                                  #taille noms legende axe des x
                    p.hm + custom.theme, #p.hm défini la heatmap
                  widths = c(0.83, 0.17), #largeur heatmap
                  nrow = 1, 
                  ncol = 2)
  
  grid::grid.newpage()
  grid::grid.draw(p2)
  invisible(p2)
}

p<-plot_da_compositebis(da.otus, data, labvec = c("Chow" = "Chow", "HFPalm" = "HFP"), min.abundance = 50, lfc.threshold = 3)

#ggsave(p, file = "Plot Comoposite H vs NAFL.png",width = 13, height = 6,dpi=300)